summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorYicong Yang <yangyicong@hisilicon.com>2024-02-08 05:40:26 +0300
committerNamhyung Kim <namhyung@kernel.org>2024-02-10 01:59:53 +0300
commitcbc917a1b03bce85f385c1e640c9dcb02ffb9ab0 (patch)
tree85bf911a310bfd4c9e3804d1959afc8441ac3a8f /tools/perf/builtin-stat.c
parent9a440bb2e2e9a4af3a7857af42a825f61b27a18c (diff)
downloadlinux-cbc917a1b03bce85f385c1e640c9dcb02ffb9ab0.tar.xz
perf stat: Support per-cluster aggregation
Some platforms have 'cluster' topology and CPUs in the cluster will share resources like L3 Cache Tag (for HiSilicon Kunpeng SoC) or L2 cache (for Intel Jacobsville). Currently parsing and building cluster topology have been supported since [1]. perf stat has already supported aggregation for other topologies like die or socket, etc. It'll be useful to aggregate per-cluster to find problems like L3T bandwidth contention. This patch add support for "--per-cluster" option for per-cluster aggregation. Also update the docs and related test. The output will be like: [root@localhost tmp]# perf stat -a -e LLC-load --per-cluster -- sleep 5 Performance counter stats for 'system wide': S56-D0-CLS158 4 1,321,521,570 LLC-load S56-D0-CLS594 4 794,211,453 LLC-load S56-D0-CLS1030 4 41,623 LLC-load S56-D0-CLS1466 4 41,646 LLC-load S56-D0-CLS1902 4 16,863 LLC-load S56-D0-CLS2338 4 15,721 LLC-load S56-D0-CLS2774 4 22,671 LLC-load [...] On a legacy system without cluster or cluster support, the output will be look like: [root@localhost perf]# perf stat -a -e cycles --per-cluster -- sleep 1 Performance counter stats for 'system wide': S56-D0-CLS0 64 18,011,485 cycles S7182-D0-CLS0 64 16,548,835 cycles Note that this patch doesn't mix the cluster information in the outputs of --per-core to avoid breaking any tools/scripts using it. Note that perf recently supports "--per-cache" aggregation, but it's not the same with the cluster although cluster CPUs may share some cache resources. For example on my machine all clusters within a die share the same L3 cache: $ cat /sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_list 0-31 $ cat /sys/devices/system/cpu/cpu0/topology/cluster_cpus_list 0-3 [1] commit c5e22feffdd7 ("topology: Represent clusters of CPUs within a die") Tested-by: Jie Zhan <zhanjie9@hisilicon.com> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Cc: james.clark@arm.com Cc: 21cnbao@gmail.com Cc: prime.zeng@hisilicon.com Cc: Jonathan.Cameron@huawei.com Cc: fanghao11@huawei.com Cc: linuxarm@huawei.com Cc: tim.c.chen@intel.com Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240208024026.2691-1-yangyicong@huawei.com
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c52
1 files changed, 49 insertions, 3 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5fe9abc6a524..6bba1a89d030 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1238,6 +1238,8 @@ static struct option stat_options[] = {
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
+ OPT_SET_UINT(0, "per-cluster", &stat_config.aggr_mode,
+ "aggregate counts per processor cluster", AGGR_CLUSTER),
OPT_CALLBACK_OPTARG(0, "per-cache", &stat_config.aggr_mode, &stat_config.aggr_level,
"cache level", "aggregate count at this cache level (Default: LLC)",
parse_cache_level),
@@ -1428,6 +1430,7 @@ static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
static const char *const aggr_mode__string[] = {
[AGGR_CORE] = "core",
[AGGR_CACHE] = "cache",
+ [AGGR_CLUSTER] = "cluster",
[AGGR_DIE] = "die",
[AGGR_GLOBAL] = "global",
[AGGR_NODE] = "node",
@@ -1455,6 +1458,12 @@ static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *confi
return aggr_cpu_id__cache(cpu, /*data=*/NULL);
}
+static struct aggr_cpu_id perf_stat__get_cluster(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__cluster(cpu, /*data=*/NULL);
+}
+
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@@ -1507,6 +1516,12 @@ static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *con
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
+static struct aggr_cpu_id perf_stat__get_cluster_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_cluster, cpu);
+}
+
static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config,
struct perf_cpu cpu)
{
@@ -1544,6 +1559,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
return aggr_cpu_id__socket;
case AGGR_DIE:
return aggr_cpu_id__die;
+ case AGGR_CLUSTER:
+ return aggr_cpu_id__cluster;
case AGGR_CACHE:
return aggr_cpu_id__cache;
case AGGR_CORE:
@@ -1569,6 +1586,8 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
return perf_stat__get_socket_cached;
case AGGR_DIE:
return perf_stat__get_die_cached;
+ case AGGR_CLUSTER:
+ return perf_stat__get_cluster_cached;
case AGGR_CACHE:
return perf_stat__get_cache_id_cached;
case AGGR_CORE:
@@ -1737,6 +1756,21 @@ static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
return id;
}
+static struct aggr_cpu_id perf_env__get_cluster_aggr_by_cpu(struct perf_cpu cpu,
+ void *data)
+{
+ struct perf_env *env = data;
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ if (cpu.cpu != -1) {
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ id.cluster = env->cpu[cpu.cpu].cluster_id;
+ }
+
+ return id;
+}
+
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
@@ -1744,12 +1778,12 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo
if (cpu.cpu != -1) {
/*
- * core_id is relative to socket and die,
- * we need a global id. So we set
- * socket, die id and core id
+ * core_id is relative to socket, die and cluster, we need a
+ * global id. So we set socket, die id, cluster id and core id.
*/
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
+ id.cluster = env->cpu[cpu.cpu].cluster_id;
id.core = env->cpu[cpu.cpu].core_id;
}
@@ -1805,6 +1839,12 @@ static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *confi
return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_cluster_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_cluster_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@@ -1842,6 +1882,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
return perf_env__get_socket_aggr_by_cpu;
case AGGR_DIE:
return perf_env__get_die_aggr_by_cpu;
+ case AGGR_CLUSTER:
+ return perf_env__get_cluster_aggr_by_cpu;
case AGGR_CACHE:
return perf_env__get_cache_aggr_by_cpu;
case AGGR_CORE:
@@ -1867,6 +1909,8 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
return perf_stat__get_socket_file;
case AGGR_DIE:
return perf_stat__get_die_file;
+ case AGGR_CLUSTER:
+ return perf_stat__get_cluster_file;
case AGGR_CACHE:
return perf_stat__get_cache_file;
case AGGR_CORE:
@@ -2398,6 +2442,8 @@ static int __cmd_report(int argc, const char **argv)
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
+ OPT_SET_UINT(0, "per-cluster", &perf_stat.aggr_mode,
+ "aggregate counts perf processor cluster", AGGR_CLUSTER),
OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
"cache level",
"aggregate count at this cache level (Default: LLC)",