perf kwork top: Implements BPF-based cpu usage statistics

Use BPF to collect statistics on the CPU usage based on perf BPF skeletons. Example usage: # perf kwork top -h Usage: perf kwork top [<options>] -b, --use-bpf Use BPF to measure task cpu usage -C, --cpu <cpu> list of cpus to profile -i, --input <file> input file name -n, --name <name> event name to profile -s, --sort <key[,key2...]> sort by key(s): rate, runtime, tid --time <str> Time span for analysis (start,stop) # # perf kwork -k sched top -b Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 160702.425 ms, 8 cpus %Cpu(s): 36.00% id, 0.00% hi, 0.00% si %Cpu0 [|||||||||||||||||| 61.66%] %Cpu1 [|||||||||||||||||| 61.27%] %Cpu2 [||||||||||||||||||| 66.40%] %Cpu3 [|||||||||||||||||| 61.28%] %Cpu4 [|||||||||||||||||| 61.82%] %Cpu5 [||||||||||||||||||||||| 77.41%] %Cpu6 [|||||||||||||||||| 61.73%] %Cpu7 [|||||||||||||||||| 63.25%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 0 0 38.72 8089.463 ms [swapper/1] 0 0 38.71 8084.547 ms [swapper/3] 0 0 38.33 8007.532 ms [swapper/0] 0 0 38.26 7992.985 ms [swapper/6] 0 0 38.17 7971.865 ms [swapper/4] 0 0 36.74 7447.765 ms [swapper/7] 0 0 33.59 6486.942 ms [swapper/2] 0 0 22.58 3771.268 ms [swapper/5] 9545 9351 2.48 447.136 ms sched-messaging 9574 9351 2.09 418.583 ms sched-messaging 9724 9351 2.05 372.407 ms sched-messaging 9531 9351 2.01 368.804 ms sched-messaging 9512 9351 2.00 362.250 ms sched-messaging 9514 9351 1.95 357.767 ms sched-messaging 9538 9351 1.86 384.476 ms sched-messaging 9712 9351 1.84 386.490 ms sched-messaging 9723 9351 1.83 380.021 ms sched-messaging 9722 9351 1.82 382.738 ms sched-messaging 9517 9351 1.81 354.794 ms sched-messaging 9559 9351 1.79 344.305 ms sched-messaging 9725 9351 1.77 365.315 ms sched-messaging <SNIP> # perf kwork -k sched top -b -n perf Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 151563.332 ms, 8 cpus %Cpu(s): 26.49% id, 0.00% hi, 0.00% si %Cpu0 [ 0.01%] %Cpu1 [ 0.00%] %Cpu2 [ 0.00%] %Cpu3 [ 0.00%] %Cpu4 [ 0.00%] %Cpu5 [ 0.00%] %Cpu6 [ 0.00%] %Cpu7 [ 0.00%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 9754 9754 0.01 2.303 ms perf # # perf kwork -k sched top -b -C 2,3,4 Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 48016.721 ms, 3 cpus %Cpu(s): 27.82% id, 0.00% hi, 0.00% si %Cpu2 [|||||||||||||||||||||| 74.68%] %Cpu3 [||||||||||||||||||||| 71.06%] %Cpu4 [||||||||||||||||||||| 70.91%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 0 0 29.08 4734.998 ms [swapper/4] 0 0 28.93 4710.029 ms [swapper/3] 0 0 25.31 3912.363 ms [swapper/2] 10248 10158 1.62 264.931 ms sched-messaging 10253 10158 1.62 265.136 ms sched-messaging 10158 10158 1.60 263.013 ms bash 10360 10158 1.49 243.639 ms sched-messaging 10413 10158 1.48 238.604 ms sched-messaging 10531 10158 1.47 234.067 ms sched-messaging 10400 10158 1.47 240.631 ms sched-messaging 10355 10158 1.47 230.586 ms sched-messaging 10377 10158 1.43 234.835 ms sched-messaging 10526 10158 1.42 232.045 ms sched-messaging 10298 10158 1.41 222.396 ms sched-messaging 10410 10158 1.38 221.853 ms sched-messaging 10364 10158 1.38 226.042 ms sched-messaging 10480 10158 1.36 213.633 ms sched-messaging 10370 10158 1.36 223.620 ms sched-messaging 10553 10158 1.34 217.169 ms sched-messaging 10291 10158 1.34 211.516 ms sched-messaging 10251 10158 1.34 218.813 ms sched-messaging 10522 10158 1.33 218.498 ms sched-messaging 10288 10158 1.33 216.787 ms sched-messaging <SNIP> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yang Jihong <yangjihong1@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Link: https://lore.kernel.org/r/20230812084917.169338-15-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
author: Yang Jihong <yangjihong1@huawei.com> 2023-08-12 11:49:15 +0300
committer: Arnaldo Carvalho de Melo <acme@redhat.com> 2023-09-12 23:31:59 +0300
commit: 8c98420987cda501924b7a34f2413b982517d244 (patch)
tree: 360d0b53b70bdc157166573566a4e8a3de55de57 /tools/perf/util/bpf_skel
parent: aa172a5ad3159c1cc97da37253c972683681adaa (diff)
download: linux-8c98420987cda501924b7a34f2413b982517d244.tar.xz
1 files changed, 187 insertions, 0 deletions
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
new file mode 100644
index 000000000000..47ad61608ec7
--- /dev/null
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022, Huawei
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/*
+ * This should be in sync with "util/kwork.h"
+ */
+enum kwork_class_type {
+	KWORK_CLASS_IRQ,
+	KWORK_CLASS_SOFTIRQ,
+	KWORK_CLASS_WORKQUEUE,
+	KWORK_CLASS_SCHED,
+	KWORK_CLASS_MAX,
+};
+
+#define MAX_ENTRIES     102400
+#define MAX_NR_CPUS     2048
+#define PF_KTHREAD      0x00200000
+#define MAX_COMMAND_LEN 16
+
+struct time_data {
+	__u64 timestamp;
+};
+
+struct work_data {
+	__u64 runtime;
+};
+
+struct task_data {
+	__u32 tgid;
+	__u32 is_kthread;
+	char comm[MAX_COMMAND_LEN];
+};
+
+struct work_key {
+	__u32 type;
+	__u32 pid;
+	__u64 task_p;
+};
+
+struct task_key {
+	__u32 pid;
+	__u32 cpu;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct time_data);
+} kwork_top_task_time SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct task_key));
+	__uint(value_size, sizeof(struct task_data));
+	__uint(max_entries, MAX_ENTRIES);
+} kwork_top_tasks SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(key_size, sizeof(struct work_key));
+	__uint(value_size, sizeof(struct work_data));
+	__uint(max_entries, MAX_ENTRIES);
+} kwork_top_works SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u8));
+	__uint(max_entries, MAX_NR_CPUS);
+} kwork_top_cpu_filter SEC(".maps");
+
+int enabled = 0;
+
+int has_cpu_filter = 0;
+
+__u64 from_timestamp = 0;
+__u64 to_timestamp = 0;
+
+static __always_inline int cpu_is_filtered(__u32 cpu)
+{
+	__u8 *cpu_val;
+
+	if (has_cpu_filter) {
+		cpu_val = bpf_map_lookup_elem(&kwork_top_cpu_filter, &cpu);
+		if (!cpu_val)
+			return 1;
+	}
+
+	return 0;
+}
+
+static __always_inline void update_task_info(struct task_struct *task, __u32 cpu)
+{
+	struct task_key key = {
+		.pid = task->pid,
+		.cpu = cpu,
+	};
+
+	if (!bpf_map_lookup_elem(&kwork_top_tasks, &key)) {
+		struct task_data data = {
+			.tgid = task->tgid,
+			.is_kthread = task->flags & PF_KTHREAD ? 1 : 0,
+		};
+		BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
+
+		bpf_map_update_elem(&kwork_top_tasks, &key, &data, BPF_ANY);
+	}
+}
+
+static __always_inline void update_work(struct work_key *key, __u64 delta)
+{
+	struct work_data *data;
+
+	data = bpf_map_lookup_elem(&kwork_top_works, key);
+	if (data) {
+		data->runtime += delta;
+	} else {
+		struct work_data new_data = {
+			.runtime = delta,
+		};
+
+		bpf_map_update_elem(&kwork_top_works, key, &new_data, BPF_ANY);
+	}
+}
+
+static void on_sched_out(struct task_struct *task, __u64 ts, __u32 cpu)
+{
+	__u64 delta;
+	struct time_data *pelem;
+
+	pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL, 0);
+	if (pelem)
+		delta = ts - pelem->timestamp;
+	else
+		delta = ts - from_timestamp;
+
+	struct work_key key = {
+		.type = KWORK_CLASS_SCHED,
+		.pid = task->pid,
+		.task_p = (__u64)task,
+	};
+
+	update_work(&key, delta);
+	update_task_info(task, cpu);
+}
+
+static void on_sched_in(struct task_struct *task, __u64 ts)
+{
+	struct time_data *pelem;
+
+	pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (pelem)
+		pelem->timestamp = ts;
+}
+
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+	struct task_struct *prev, *next;
+
+	prev = (struct task_struct *)ctx[1];
+	next = (struct task_struct *)ctx[2];
+
+	if (!enabled)
+		return 0;
+
+	__u32 cpu = bpf_get_smp_processor_id();
+
+	if (cpu_is_filtered(cpu))
+		return 0;
+
+	__u64 ts = bpf_ktime_get_ns();
+
+	on_sched_out(prev, ts, cpu);
+	on_sched_in(next, ts);
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
author	Yang Jihong <yangjihong1@huawei.com>	2023-08-12 11:49:15 +0300
committer	Arnaldo Carvalho de Melo <acme@redhat.com>	2023-09-12 23:31:59 +0300
commit	8c98420987cda501924b7a34f2413b982517d244 (patch)
tree	360d0b53b70bdc157166573566a4e8a3de55de57 /tools/perf/util/bpf_skel
parent	aa172a5ad3159c1cc97da37253c972683681adaa (diff)
download	linux-8c98420987cda501924b7a34f2413b982517d244.tar.xz