summaryrefslogtreecommitdiff
path: root/tools/perf/util/bpf_skel
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/bpf_skel')
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c16
-rw-r--r--tools/perf/util/bpf_skel/kwork_top.bpf.c338
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c145
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h3
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/.gitignore1
5 files changed, 476 insertions, 27 deletions
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 939ec769bf4a..52c270330ae0 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -153,7 +153,7 @@ static inline
unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
{
unsigned int augmented_len = sizeof(*augmented_arg);
- int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
+ int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg);
augmented_arg->size = augmented_arg->err = 0;
/*
@@ -203,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
_Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
socklen &= sizeof(augmented_args->saddr) - 1;
- bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
return augmented__output(args, augmented_args, len + socklen);
}
@@ -221,7 +221,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
socklen &= sizeof(augmented_args->saddr) - 1;
- bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
return augmented__output(args, augmented_args, len + socklen);
}
@@ -311,7 +311,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
if (augmented_args == NULL)
goto failure;
- if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0)
goto failure;
attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
@@ -325,7 +325,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
goto failure;
// Now that we read attr->size and tested it against the size limits, read it completely
- if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
+ if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0)
goto failure;
return augmented__output(args, augmented_args, len + size);
@@ -347,7 +347,7 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
if (size > sizeof(augmented_args->__data))
goto failure;
- bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
+ bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg);
return augmented__output(args, augmented_args, len + size);
failure:
@@ -385,7 +385,7 @@ int sys_enter(struct syscall_enter_args *args)
if (augmented_args == NULL)
return 1;
- bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
+ bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args);
/*
* Jump to syscall specific augmenter, even if the default one,
@@ -406,7 +406,7 @@ int sys_exit(struct syscall_exit_args *args)
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
- bpf_probe_read(&exit_args, sizeof(exit_args), args);
+ bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
/*
* Jump to syscall specific return augmenter, even if the default one,
* "!raw_syscalls:unaugmented" that will just return 1 to return the
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
new file mode 100644
index 000000000000..84c15ccbab44
--- /dev/null
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022, Huawei
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/*
+ * This should be in sync with "util/kwork.h"
+ */
+enum kwork_class_type {
+ KWORK_CLASS_IRQ,
+ KWORK_CLASS_SOFTIRQ,
+ KWORK_CLASS_WORKQUEUE,
+ KWORK_CLASS_SCHED,
+ KWORK_CLASS_MAX,
+};
+
+#define MAX_ENTRIES 102400
+#define MAX_NR_CPUS 2048
+#define PF_KTHREAD 0x00200000
+#define MAX_COMMAND_LEN 16
+
+struct time_data {
+ __u64 timestamp;
+};
+
+struct work_data {
+ __u64 runtime;
+};
+
+struct task_data {
+ __u32 tgid;
+ __u32 is_kthread;
+ char comm[MAX_COMMAND_LEN];
+};
+
+struct work_key {
+ __u32 type;
+ __u32 pid;
+ __u64 task_p;
+};
+
+struct task_key {
+ __u32 pid;
+ __u32 cpu;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct time_data);
+} kwork_top_task_time SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(struct work_key));
+ __uint(value_size, sizeof(struct time_data));
+ __uint(max_entries, MAX_ENTRIES);
+} kwork_top_irq_time SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct task_key));
+ __uint(value_size, sizeof(struct task_data));
+ __uint(max_entries, MAX_ENTRIES);
+} kwork_top_tasks SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(struct work_key));
+ __uint(value_size, sizeof(struct work_data));
+ __uint(max_entries, MAX_ENTRIES);
+} kwork_top_works SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u8));
+ __uint(max_entries, MAX_NR_CPUS);
+} kwork_top_cpu_filter SEC(".maps");
+
+int enabled = 0;
+
+int has_cpu_filter = 0;
+
+__u64 from_timestamp = 0;
+__u64 to_timestamp = 0;
+
+static __always_inline int cpu_is_filtered(__u32 cpu)
+{
+ __u8 *cpu_val;
+
+ if (has_cpu_filter) {
+ cpu_val = bpf_map_lookup_elem(&kwork_top_cpu_filter, &cpu);
+ if (!cpu_val)
+ return 1;
+ }
+
+ return 0;
+}
+
+static __always_inline void update_task_info(struct task_struct *task, __u32 cpu)
+{
+ struct task_key key = {
+ .pid = task->pid,
+ .cpu = cpu,
+ };
+
+ if (!bpf_map_lookup_elem(&kwork_top_tasks, &key)) {
+ struct task_data data = {
+ .tgid = task->tgid,
+ .is_kthread = task->flags & PF_KTHREAD ? 1 : 0,
+ };
+ BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
+
+ bpf_map_update_elem(&kwork_top_tasks, &key, &data, BPF_ANY);
+ }
+}
+
+static __always_inline void update_work(struct work_key *key, __u64 delta)
+{
+ struct work_data *data;
+
+ data = bpf_map_lookup_elem(&kwork_top_works, key);
+ if (data) {
+ data->runtime += delta;
+ } else {
+ struct work_data new_data = {
+ .runtime = delta,
+ };
+
+ bpf_map_update_elem(&kwork_top_works, key, &new_data, BPF_ANY);
+ }
+}
+
+static void on_sched_out(struct task_struct *task, __u64 ts, __u32 cpu)
+{
+ __u64 delta;
+ struct time_data *pelem;
+
+ pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL, 0);
+ if (pelem)
+ delta = ts - pelem->timestamp;
+ else
+ delta = ts - from_timestamp;
+
+ struct work_key key = {
+ .type = KWORK_CLASS_SCHED,
+ .pid = task->pid,
+ .task_p = (__u64)task,
+ };
+
+ update_work(&key, delta);
+ update_task_info(task, cpu);
+}
+
+static void on_sched_in(struct task_struct *task, __u64 ts)
+{
+ struct time_data *pelem;
+
+ pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (pelem)
+ pelem->timestamp = ts;
+}
+
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+ struct task_struct *prev, *next;
+
+ prev = (struct task_struct *)ctx[1];
+ next = (struct task_struct *)ctx[2];
+
+ if (!enabled)
+ return 0;
+
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu_is_filtered(cpu))
+ return 0;
+
+ __u64 ts = bpf_ktime_get_ns();
+
+ on_sched_out(prev, ts, cpu);
+ on_sched_in(next, ts);
+
+ return 0;
+}
+
+SEC("tp_btf/irq_handler_entry")
+int on_irq_handler_entry(u64 *cxt)
+{
+ struct task_struct *task;
+
+ if (!enabled)
+ return 0;
+
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu_is_filtered(cpu))
+ return 0;
+
+ __u64 ts = bpf_ktime_get_ns();
+
+ task = (struct task_struct *)bpf_get_current_task();
+ if (!task)
+ return 0;
+
+ struct work_key key = {
+ .type = KWORK_CLASS_IRQ,
+ .pid = BPF_CORE_READ(task, pid),
+ .task_p = (__u64)task,
+ };
+
+ struct time_data data = {
+ .timestamp = ts,
+ };
+
+ bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY);
+
+ return 0;
+}
+
+SEC("tp_btf/irq_handler_exit")
+int on_irq_handler_exit(u64 *cxt)
+{
+ __u64 delta;
+ struct task_struct *task;
+ struct time_data *pelem;
+
+ if (!enabled)
+ return 0;
+
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu_is_filtered(cpu))
+ return 0;
+
+ __u64 ts = bpf_ktime_get_ns();
+
+ task = (struct task_struct *)bpf_get_current_task();
+ if (!task)
+ return 0;
+
+ struct work_key key = {
+ .type = KWORK_CLASS_IRQ,
+ .pid = BPF_CORE_READ(task, pid),
+ .task_p = (__u64)task,
+ };
+
+ pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key);
+ if (pelem && pelem->timestamp != 0)
+ delta = ts - pelem->timestamp;
+ else
+ delta = ts - from_timestamp;
+
+ update_work(&key, delta);
+
+ return 0;
+}
+
+SEC("tp_btf/softirq_entry")
+int on_softirq_entry(u64 *cxt)
+{
+ struct task_struct *task;
+
+ if (!enabled)
+ return 0;
+
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu_is_filtered(cpu))
+ return 0;
+
+ __u64 ts = bpf_ktime_get_ns();
+
+ task = (struct task_struct *)bpf_get_current_task();
+ if (!task)
+ return 0;
+
+ struct work_key key = {
+ .type = KWORK_CLASS_SOFTIRQ,
+ .pid = BPF_CORE_READ(task, pid),
+ .task_p = (__u64)task,
+ };
+
+ struct time_data data = {
+ .timestamp = ts,
+ };
+
+ bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY);
+
+ return 0;
+}
+
+SEC("tp_btf/softirq_exit")
+int on_softirq_exit(u64 *cxt)
+{
+ __u64 delta;
+ struct task_struct *task;
+ struct time_data *pelem;
+
+ if (!enabled)
+ return 0;
+
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu_is_filtered(cpu))
+ return 0;
+
+ __u64 ts = bpf_ktime_get_ns();
+
+ task = (struct task_struct *)bpf_get_current_task();
+ if (!task)
+ return 0;
+
+ struct work_key key = {
+ .type = KWORK_CLASS_SOFTIRQ,
+ .pid = BPF_CORE_READ(task, pid),
+ .task_p = (__u64)task,
+ };
+
+ pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key);
+ if (pelem)
+ delta = ts - pelem->timestamp;
+ else
+ delta = ts - from_timestamp;
+
+ update_work(&key, delta);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 8d3cfbb3cc65..95cd8414f6ef 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -42,6 +42,14 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} tstamp SEC(".maps");
+/* maintain per-CPU timestamp at the beginning of contention */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct tstamp_data));
+ __uint(max_entries, 1);
+} tstamp_cpu SEC(".maps");
+
/* actual lock contention statistics */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
@@ -92,6 +100,13 @@ struct {
__uint(max_entries, 1);
} addr_filter SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cgroup_filter SEC(".maps");
+
struct rw_semaphore___old {
struct task_struct *owner;
} __attribute__((preserve_access_index));
@@ -114,10 +129,14 @@ int has_cpu;
int has_task;
int has_type;
int has_addr;
+int has_cgroup;
int needs_callstack;
int stack_skip;
int lock_owner;
+int use_cgroup_v2;
+int perf_subsys_id = -1;
+
/* determine the key of lock stat */
int aggr_mode;
@@ -130,6 +149,29 @@ int data_fail;
int task_map_full;
int data_map_full;
+static inline __u64 get_current_cgroup_id(void)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ if (use_cgroup_v2)
+ return bpf_get_current_cgroup_id();
+
+ task = bpf_get_current_task_btf();
+
+ if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+ perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+ perf_event_cgrp_id);
+#else
+ perf_subsys_id = perf_event_cgrp_id;
+#endif
+ }
+
+ cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup);
+ return BPF_CORE_READ(cgrp, kn, id);
+}
+
static inline int can_record(u64 *ctx)
{
if (has_cpu) {
@@ -168,6 +210,15 @@ static inline int can_record(u64 *ctx)
return 0;
}
+ if (has_cgroup) {
+ __u8 *ok;
+ __u64 cgrp = get_current_cgroup_id();
+
+ ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp);
+ if (!ok)
+ return 0;
+ }
+
return 1;
}
@@ -268,30 +319,57 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags)
return 0;
}
-SEC("tp_btf/contention_begin")
-int contention_begin(u64 *ctx)
+static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
{
__u32 pid;
struct tstamp_data *pelem;
- if (!enabled || !can_record(ctx))
- return 0;
+ /* Use per-cpu array map for spinlock and rwlock */
+ if (flags == (LCB_F_SPIN | LCB_F_READ) || flags == LCB_F_SPIN ||
+ flags == (LCB_F_SPIN | LCB_F_WRITE)) {
+ __u32 idx = 0;
+
+ pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
+ /* Do not update the element for nested locks */
+ if (pelem && pelem->lock)
+ pelem = NULL;
+ return pelem;
+ }
pid = bpf_get_current_pid_tgid();
pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ /* Do not update the element for nested locks */
if (pelem && pelem->lock)
- return 0;
+ return NULL;
if (pelem == NULL) {
struct tstamp_data zero = {};
- bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
+ if (bpf_map_update_elem(&tstamp, &pid, &zero, BPF_NOEXIST) < 0) {
+ __sync_fetch_and_add(&task_fail, 1);
+ return NULL;
+ }
+
pelem = bpf_map_lookup_elem(&tstamp, &pid);
if (pelem == NULL) {
__sync_fetch_and_add(&task_fail, 1);
- return 0;
+ return NULL;
}
}
+ return pelem;
+}
+
+SEC("tp_btf/contention_begin")
+int contention_begin(u64 *ctx)
+{
+ struct tstamp_data *pelem;
+
+ if (!enabled || !can_record(ctx))
+ return 0;
+
+ pelem = get_tstamp_elem(ctx[1]);
+ if (pelem == NULL)
+ return 0;
pelem->timestamp = bpf_ktime_get_ns();
pelem->lock = (__u64)ctx[0];
@@ -330,23 +408,42 @@ int contention_begin(u64 *ctx)
SEC("tp_btf/contention_end")
int contention_end(u64 *ctx)
{
- __u32 pid;
+ __u32 pid = 0, idx = 0;
struct tstamp_data *pelem;
struct contention_key key = {};
struct contention_data *data;
__u64 duration;
+ bool need_delete = false;
if (!enabled)
return 0;
- pid = bpf_get_current_pid_tgid();
- pelem = bpf_map_lookup_elem(&tstamp, &pid);
- if (!pelem || pelem->lock != ctx[0])
- return 0;
+ /*
+ * For spinlock and rwlock, it needs to get the timestamp for the
+ * per-cpu map. However, contention_end does not have the flags
+ * so it cannot know whether it reads percpu or hash map.
+ *
+ * Try per-cpu map first and check if there's active contention.
+ * If it is, do not read hash map because it cannot go to sleeping
+ * locks before releasing the spinning locks.
+ */
+ pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
+ if (pelem && pelem->lock) {
+ if (pelem->lock != ctx[0])
+ return 0;
+ } else {
+ pid = bpf_get_current_pid_tgid();
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ if (!pelem || pelem->lock != ctx[0])
+ return 0;
+ need_delete = true;
+ }
duration = bpf_ktime_get_ns() - pelem->timestamp;
if ((__s64)duration < 0) {
- bpf_map_delete_elem(&tstamp, &pid);
+ pelem->lock = 0;
+ if (need_delete)
+ bpf_map_delete_elem(&tstamp, &pid);
__sync_fetch_and_add(&time_fail, 1);
return 0;
}
@@ -358,16 +455,22 @@ int contention_end(u64 *ctx)
case LOCK_AGGR_TASK:
if (lock_owner)
key.pid = pelem->flags;
- else
+ else {
+ if (!need_delete)
+ pid = bpf_get_current_pid_tgid();
key.pid = pid;
+ }
if (needs_callstack)
key.stack_id = pelem->stack_id;
break;
case LOCK_AGGR_ADDR:
- key.lock_addr = pelem->lock;
+ key.lock_addr_or_cgroup = pelem->lock;
if (needs_callstack)
key.stack_id = pelem->stack_id;
break;
+ case LOCK_AGGR_CGROUP:
+ key.lock_addr_or_cgroup = get_current_cgroup_id();
+ break;
default:
/* should not happen */
return 0;
@@ -376,7 +479,9 @@ int contention_end(u64 *ctx)
data = bpf_map_lookup_elem(&lock_stat, &key);
if (!data) {
if (data_map_full) {
- bpf_map_delete_elem(&tstamp, &pid);
+ pelem->lock = 0;
+ if (need_delete)
+ bpf_map_delete_elem(&tstamp, &pid);
__sync_fetch_and_add(&data_fail, 1);
return 0;
}
@@ -399,7 +504,9 @@ int contention_end(u64 *ctx)
data_map_full = 1;
__sync_fetch_and_add(&data_fail, 1);
}
- bpf_map_delete_elem(&tstamp, &pid);
+ pelem->lock = 0;
+ if (need_delete)
+ bpf_map_delete_elem(&tstamp, &pid);
return 0;
}
@@ -412,7 +519,9 @@ int contention_end(u64 *ctx)
if (data->min_time > duration)
data->min_time = duration;
- bpf_map_delete_elem(&tstamp, &pid);
+ pelem->lock = 0;
+ if (need_delete)
+ bpf_map_delete_elem(&tstamp, &pid);
return 0;
}
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index 260062a9f2ab..08482daf61be 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -6,7 +6,7 @@
struct contention_key {
u32 stack_id;
u32 pid;
- u64 lock_addr;
+ u64 lock_addr_or_cgroup;
};
#define TASK_COMM_LEN 16
@@ -39,6 +39,7 @@ enum lock_aggr_mode {
LOCK_AGGR_ADDR = 0,
LOCK_AGGR_TASK,
LOCK_AGGR_CALLER,
+ LOCK_AGGR_CGROUP,
};
enum lock_class_sym {
diff --git a/tools/perf/util/bpf_skel/vmlinux/.gitignore b/tools/perf/util/bpf_skel/vmlinux/.gitignore
new file mode 100644
index 000000000000..49502c04183a
--- /dev/null
+++ b/tools/perf/util/bpf_skel/vmlinux/.gitignore
@@ -0,0 +1 @@
+!vmlinux.h