diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/bpf_trace.c | 27 | ||||
-rw-r--r-- | kernel/trace/fprobe.c | 14 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 10 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 245 | ||||
-rw-r--r-- | kernel/trace/trace.c | 155 | ||||
-rw-r--r-- | kernel/trace/trace_btf.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_eprobe.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events_synth.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_fprobe.c | 59 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 58 | ||||
-rw-r--r-- | kernel/trace/trace_osnoise.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 419 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 33 | ||||
-rw-r--r-- | kernel/trace/trace_probe_tmpl.h | 10 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 14 | ||||
-rw-r--r-- | kernel/trace/tracing_map.c | 7 |
18 files changed, 730 insertions, 354 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7ac6c52b25eb..0a5c4efc73c3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, __bpf_kfunc_end_defs(); -BTF_SET8_START(key_sig_kfunc_set) +BTF_KFUNCS_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif -BTF_SET8_END(key_sig_kfunc_set) +BTF_KFUNCS_END(key_sig_kfunc_set) static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { .owner = THIS_MODULE, @@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, __bpf_kfunc_end_defs(); -BTF_SET8_START(fs_kfunc_set_ids) +BTF_KFUNCS_START(fs_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_SET8_END(fs_kfunc_set_ids) +BTF_KFUNCS_END(fs_kfunc_set_ids) static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) { @@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } @@ -2679,6 +2679,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { + u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies); u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); struct bpf_kprobe_multi_link *kmulti_link; u32 ucount = info->kprobe_multi.count; @@ -2686,6 +2687,8 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, if (!uaddrs ^ !ucount) return -EINVAL; + if (ucookies && !ucount) + return -EINVAL; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; @@ -2699,6 +2702,18 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, else ucount = kmulti_link->cnt; + if (ucookies) { + if (kmulti_link->cookies) { + if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64))) + return -EFAULT; + } else { + for (i = 0; i < ucount; i++) { + if (put_user(0, ucookies + i)) + return -EFAULT; + } + } + } + if (kallsyms_show_value(current_cred())) { if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) return -EFAULT; @@ -3241,7 +3256,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, .uprobe = uprobe, }; struct bpf_prog *prog = link->link.prog; - bool sleepable = prog->aux->sleepable; + bool sleepable = prog->sleepable; struct bpf_run_ctx *old_run_ctx; int err = 0; diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 6cd2a4e3afb8..9ff018245840 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -189,9 +189,6 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) { int size; - if (num <= 0) - return -EINVAL; - if (!fp->exit_handler) { fp->rethook = NULL; return 0; @@ -199,15 +196,16 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) /* Initialize rethook if needed */ if (fp->nr_maxactive) - size = fp->nr_maxactive; + num = fp->nr_maxactive; else - size = num * num_possible_cpus() * 2; - if (size <= 0) + num *= num_possible_cpus() * 2; + if (num <= 0) return -EINVAL; + size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size; + /* Initialize rethook */ - fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, - sizeof(struct fprobe_rethook_node), size); + fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num); if (IS_ERR(fp->rethook)) return PTR_ERR(fp->rethook); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b01ae7d36021..83ba342aef31 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5325,7 +5325,17 @@ static LIST_HEAD(ftrace_direct_funcs); static int register_ftrace_function_nolock(struct ftrace_ops *ops); +/* + * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct + * call will be jumped from ftrace_regs_caller. Only if the architecture does + * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it + * jumps from ftrace_caller for multiple ftrace_ops. + */ +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) +#else +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) +#endif static int check_direct_multi(struct ftrace_ops *ops) { diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 13aaf5e85b81..09df645ab9c7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -384,7 +384,6 @@ struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; - long wait_index; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; @@ -756,8 +755,19 @@ static void rb_wake_up_waiters(struct irq_work *work) wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { + /* Only cpu_buffer sets the above flags */ + struct ring_buffer_per_cpu *cpu_buffer = + container_of(rbwork, struct ring_buffer_per_cpu, irq_work); + + /* Called from interrupt context */ + raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; + + /* Waking up all waiters, they will reset the shortest full */ + cpu_buffer->shortest_full = 0; + raw_spin_unlock(&cpu_buffer->reader_lock); + wake_up_all(&rbwork->full_waiters); } } @@ -798,122 +808,144 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &cpu_buffer->irq_work; } - rbwork->wait_index++; - /* make sure the waiters see the new index */ - smp_wmb(); - /* This can be called in any context */ irq_work_queue(&rbwork->work); } +static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + bool ret = false; + + /* Reads of all CPUs always waits for any data */ + if (cpu == RING_BUFFER_ALL_CPUS) + return !ring_buffer_empty(buffer); + + cpu_buffer = buffer->buffers[cpu]; + + if (!ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + return true; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + ret = !pagebusy && full_hit(buffer, cpu, full); + + if (!ret && (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full)) { + cpu_buffer->shortest_full = full; + } + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + return ret; +} + +static inline bool +rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, + int cpu, int full, ring_buffer_cond_fn cond, void *data) +{ + if (rb_watermark_hit(buffer, cpu, full)) + return true; + + if (cond(data)) + return true; + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + if (full) + rbwork->full_waiters_pending = true; + else + rbwork->waiters_pending = true; + + return false; +} + +/* + * The default wait condition for ring_buffer_wait() is to just to exit the + * wait loop the first time it is woken up. + */ +static bool rb_wait_once(void *data) +{ + long *once = data; + + /* wait_event() actually calls this twice before scheduling*/ + if (*once > 1) + return true; + + (*once)++; + return false; +} + /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS + * @cond: condition function to break out of wait (NULL to run once) + * @data: the data to pass to @cond. * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, + ring_buffer_cond_fn cond, void *data) { struct ring_buffer_per_cpu *cpu_buffer; - DEFINE_WAIT(wait); - struct rb_irq_work *work; - long wait_index; + struct wait_queue_head *waitq; + struct rb_irq_work *rbwork; + long once = 0; int ret = 0; + if (!cond) { + cond = rb_wait_once; + data = &once; + } + /* * Depending on what the caller is waiting for, either any * data in any cpu buffer, or a specific buffer, put the * caller on the appropriate wait queue. */ if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; /* Full only makes sense on per cpu reads */ full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -ENODEV; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; - } - - wait_index = READ_ONCE(work->wait_index); - - while (true) { - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - if (full) - work->full_waiters_pending = true; - else - work->waiters_pending = true; - - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) - break; - - if (cpu != RING_BUFFER_ALL_CPUS && - !ring_buffer_empty_cpu(buffer, cpu)) { - unsigned long flags; - bool pagebusy; - bool done; - - if (!full) - break; - - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - done = !pagebusy && full_hit(buffer, cpu, full); - - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (done) - break; - } - - schedule(); - - /* Make sure to see the new wait index */ - smp_rmb(); - if (wait_index != work->wait_index) - break; + rbwork = &cpu_buffer->irq_work; } if (full) - finish_wait(&work->full_waiters, &wait); + waitq = &rbwork->full_waiters; else - finish_wait(&work->waiters, &wait); + waitq = &rbwork->waiters; + + ret = wait_event_interruptible((*waitq), + rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } @@ -937,30 +969,44 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; - struct rb_irq_work *work; + struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; + rbwork = &cpu_buffer->irq_work; } if (full) { - poll_wait(filp, &work->full_waiters, poll_table); - work->full_waiters_pending = true; - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; - } else { - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + poll_wait(filp, &rbwork->full_waiters, poll_table); + + if (rb_watermark_hit(buffer, cpu, full)) + return EPOLLIN | EPOLLRDNORM; + /* + * Only allow full_waiters_pending update to be seen after + * the shortest_full is set (in rb_watermark_hit). If the + * writer sees the full_waiters_pending flag set, it will + * compare the amount in the ring buffer to shortest_full. + * If the amount in the ring buffer is greater than the + * shortest_full percent, it will call the irq_work handler + * to wake up this list. The irq_handler will reset shortest_full + * back to zero. That's done under the reader_lock, but + * the below smp_mb() makes sure that the update to + * full_waiters_pending doesn't leak up into the above. + */ + smp_mb(); + rbwork->full_waiters_pending = true; + return 0; } + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -976,9 +1022,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); - if (full) - return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; @@ -1009,7 +1052,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer) u64 ts; /* Skip retpolines :-( */ - if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local)) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && likely(buffer->clock == trace_clock_local)) ts = trace_clock_local(); else ts = buffer->clock(); @@ -5877,6 +5920,10 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) if (psize <= BUF_PAGE_HDR_SIZE) return -EINVAL; + /* Size of a subbuf cannot be greater than the write counter */ + if (psize > RB_WRITE_MASK + 1) + return -EINVAL; + old_order = buffer->subbuf_order; old_size = buffer->subbuf_size; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a7c6fd934e9..ab4c1a1fbda8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/panic_notifier.h> +#include <linux/kmemleak.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> @@ -1532,7 +1533,7 @@ void disable_trace_on_warning(void) bool tracer_tracing_is_on(struct trace_array *tr) { if (tr->array_buffer.buffer) - return ring_buffer_record_is_on(tr->array_buffer.buffer); + return ring_buffer_record_is_set_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } @@ -1954,15 +1955,36 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) #endif /* CONFIG_TRACER_MAX_TRACE */ +struct pipe_wait { + struct trace_iterator *iter; + int wait_index; +}; + +static bool wait_pipe_cond(void *data) +{ + struct pipe_wait *pwait = data; + struct trace_iterator *iter = pwait->iter; + + if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) + return true; + + return iter->closed; +} + static int wait_on_pipe(struct trace_iterator *iter, int full) { + struct pipe_wait pwait; int ret; /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); + pwait.wait_index = atomic_read_acquire(&iter->wait_index); + pwait.iter = iter; + + ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, + wait_pipe_cond, &pwait); #ifdef CONFIG_TRACER_MAX_TRACE /* @@ -2320,7 +2342,7 @@ struct saved_cmdlines_buffer { unsigned *map_cmdline_to_pid; unsigned cmdline_num; int cmdline_idx; - char *saved_cmdlines; + char saved_cmdlines[]; }; static struct saved_cmdlines_buffer *savedcmd; @@ -2334,47 +2356,60 @@ static inline void set_cmdline(int idx, const char *cmdline) strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) +{ + int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); + + kfree(s->map_cmdline_to_pid); + kmemleak_free(s); + free_pages((unsigned long)s, order); +} + +static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) { + struct saved_cmdlines_buffer *s; + struct page *page; + int orig_size, size; + int order; + + /* Figure out how much is needed to hold the given number of cmdlines */ + orig_size = sizeof(*s) + val * TASK_COMM_LEN; + order = get_order(orig_size); + size = 1 << (order + PAGE_SHIFT); + page = alloc_pages(GFP_KERNEL, order); + if (!page) + return NULL; + + s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); + memset(s, 0, sizeof(*s)); + + /* Round up to actual allocation */ + val = (size - sizeof(*s)) / TASK_COMM_LEN; + s->cmdline_num = val; + s->map_cmdline_to_pid = kmalloc_array(val, sizeof(*s->map_cmdline_to_pid), GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; + if (!s->map_cmdline_to_pid) { + free_saved_cmdlines_buffer(s); + return NULL; } s->cmdline_idx = 0; - s->cmdline_num = val; memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - return 0; + return s; } static int trace_create_savedcmd(void) { - int ret; + savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; - - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } - - return 0; + return savedcmd ? 0 : -ENOMEM; } int is_tracing_stopped(void) @@ -5733,16 +5768,15 @@ static const char readme_msg[] = "\t args: <name>=fetcharg[:type]\n" "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API -#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" +#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS "\t <argname>[->field[->field|.field...]],\n" -#else - "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" #endif #else "\t $stack<index>, $stack, $retval, $comm,\n" #endif "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" + "\t kernel return probes support: $retval, $arg<N>, $comm\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" "\t symstr, <type>\\[<array-size>\\]\n" @@ -6056,26 +6090,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = allocate_cmdlines_buffer(val); if (!s) return -ENOMEM; - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; @@ -7291,6 +7313,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) return 0; } +#define TRACE_MARKER_MAX_SIZE 4096 + static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) @@ -7318,6 +7342,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if ((ssize_t)cnt < 0) return -EINVAL; + if (cnt > TRACE_MARKER_MAX_SIZE) + cnt = TRACE_MARKER_MAX_SIZE; + meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ again: size = cnt + meta_size; @@ -7326,11 +7353,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt < FAULTED_SIZE) size += FAULTED_SIZE - cnt; - if (size > TRACE_SEQ_BUFFER_SIZE) { - cnt -= size - TRACE_SEQ_BUFFER_SIZE; - goto again; - } - buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, tracing_gen_ctx()); @@ -8391,6 +8413,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return size; } +static int tracing_buffers_flush(struct file *file, fl_owner_t id) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + iter->closed = true; + /* Make sure the waiters see the new wait_index */ + (void)atomic_fetch_inc_release(&iter->wait_index); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + return 0; +} + static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; @@ -8402,12 +8438,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); - iter->wait_index++; - /* Make sure the waiters see the new wait_index */ - smp_wmb(); - - ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8490,6 +8520,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; + bool woken = false; int page_size; int entries, i; ssize_t ret = 0; @@ -8563,17 +8594,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { - long wait_index; if (ret) goto out; + if (woken) + goto out; + ret = -EAGAIN; if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) goto out; - wait_index = READ_ONCE(iter->wait_index); - ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); if (ret) goto out; @@ -8582,10 +8613,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if (!tracer_tracing_is_on(iter->tr)) goto out; - /* Make sure we see the new wait_index */ - smp_rmb(); - if (wait_index != iter->wait_index) - goto out; + /* Iterate one more time to collect any new data then exit */ + woken = true; goto again; } @@ -8608,9 +8637,8 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned mutex_lock(&trace_types_lock); - iter->wait_index++; /* Make sure the waiters see the new wait_index */ - smp_wmb(); + (void)atomic_fetch_inc_release(&iter->wait_index); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); @@ -8623,6 +8651,7 @@ static const struct file_operations tracing_buffers_fops = { .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, + .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, .llseek = no_llseek, diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c index ca224d53bfdc..5bbdbcbbde3c 100644 --- a/kernel/trace/trace_btf.c +++ b/kernel/trace/trace_btf.c @@ -91,8 +91,8 @@ retry: for_each_member(i, type, member) { if (!member->name_off) { /* Anonymous union/struct: push it for later use */ - type = btf_type_skip_modifiers(btf, member->type, &tid); - if (type && top < BTF_ANON_STACK_MAX) { + if (btf_type_skip_modifiers(btf, member->type, &tid) && + top < BTF_ANON_STACK_MAX) { anon_stack[top].tid = tid; anon_stack[top++].offset = cur_offset + member->offset; diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 03c851f57969..b0e0ec85912e 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -220,7 +220,7 @@ static struct trace_eprobe *alloc_event_probe(const char *group, if (!ep->event_system) goto error; - ret = trace_probe_init(&ep->tp, this_event, group, false); + ret = trace_probe_init(&ep->tp, this_event, group, false, nargs); if (ret < 0) goto error; @@ -390,8 +390,8 @@ static int get_eprobe_size(struct trace_probe *tp, void *rec) /* Note that we don't verify it, since the code does not come from user space */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { unsigned long val; int ret; @@ -438,7 +438,7 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) return; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); - store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize); + store_trace_args(&entry[1], &edata->ep->tp, rec, NULL, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e7af286af4f1..c82b401a294d 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -441,8 +441,9 @@ static unsigned int trace_string(struct synth_trace_event *entry, if (is_dynamic) { union trace_synth_field *data = &entry->fields[*n_u64]; + len = fetch_store_strlen((unsigned long)str_val); data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size; - data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val); + data->as_dynamic.len = len; ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 46439e3bcec4..b33c3861fbbb 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 7d2ddbcfa377..4f4280815522 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -4,6 +4,7 @@ * Copyright (C) 2022 Google LLC. */ #define pr_fmt(fmt) "trace_fprobe: " fmt +#include <asm/ptrace.h> #include <linux/fprobe.h> #include <linux/module.h> @@ -129,8 +130,8 @@ static bool trace_fprobe_is_registered(struct trace_fprobe *tf) * from user space. */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; @@ -152,6 +153,9 @@ retry: case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); break; + case FETCH_OP_EDATA: + val = *(unsigned long *)((unsigned long)edata + code->offset); + break; #endif case FETCH_NOP_SYMBOL: /* Ignore a place holder */ code++; @@ -184,7 +188,7 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, NULL); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tf->tp.size + dsize); @@ -194,7 +198,7 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, fbuffer.regs = regs; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->ip = entry_ip; - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } @@ -210,11 +214,24 @@ fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, } NOKPROBE_SYMBOL(fentry_trace_func); -/* Kretprobe handler */ +/* function exit handler */ +static int trace_fprobe_entry_handler(struct fprobe *fp, unsigned long entry_ip, + unsigned long ret_ip, struct pt_regs *regs, + void *entry_data) +{ + struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); + + if (tf->tp.entry_arg) + store_trace_entry_data(entry_data, &tf->tp, regs); + + return 0; +} +NOKPROBE_SYMBOL(trace_fprobe_entry_handler) + static nokprobe_inline void __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, unsigned long ret_ip, struct pt_regs *regs, - struct trace_event_file *trace_file) + void *entry_data, struct trace_event_file *trace_file) { struct fexit_trace_entry_head *entry; struct trace_event_buffer fbuffer; @@ -227,7 +244,7 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, entry_data); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tf->tp.size + dsize); @@ -238,19 +255,19 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = entry_ip; entry->ret_ip = ret_ip; - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } static void fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs) + unsigned long ret_ip, struct pt_regs *regs, void *entry_data) { struct event_file_link *link; trace_probe_for_each_link_rcu(link, &tf->tp) - __fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file); + __fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data, link->file); } NOKPROBE_SYMBOL(fexit_trace_func); @@ -269,7 +286,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, if (hlist_empty(head)) return 0; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, NULL); __size = sizeof(*entry) + tf->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -280,7 +297,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, entry->ip = entry_ip; memset(&entry[1], 0, dsize); - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); return 0; @@ -289,7 +306,8 @@ NOKPROBE_SYMBOL(fentry_perf_func); static void fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs) + unsigned long ret_ip, struct pt_regs *regs, + void *entry_data) { struct trace_event_call *call = trace_probe_event_call(&tf->tp); struct fexit_trace_entry_head *entry; @@ -301,7 +319,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, if (hlist_empty(head)) return; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, entry_data); __size = sizeof(*entry) + tf->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -312,7 +330,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, entry->func = entry_ip; entry->ret_ip = ret_ip; - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); } @@ -343,10 +361,10 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip, struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE)) - fexit_trace_func(tf, entry_ip, ret_ip, regs); + fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE)) - fexit_perf_func(tf, entry_ip, ret_ip, regs); + fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data); #endif } NOKPROBE_SYMBOL(fexit_dispatcher); @@ -389,7 +407,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, tf->tpoint = tpoint; tf->fp.nr_maxactive = maxactive; - ret = trace_probe_init(&tf->tp, event, group, false); + ret = trace_probe_init(&tf->tp, event, group, false, nargs); if (ret < 0) goto error; @@ -1109,6 +1127,11 @@ static int __trace_fprobe_create(int argc, const char *argv[]) goto error; /* This can be -ENOMEM */ } + if (is_return && tf->tp.entry_arg) { + tf->fp.entry_handler = trace_fprobe_entry_handler; + tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp); + } + ret = traceprobe_set_print_fmt(&tf->tp, is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL); if (ret < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c4c6e0e0068b..14099cc17fc9 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -290,7 +290,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, INIT_HLIST_NODE(&tk->rp.kp.hlist); INIT_LIST_HEAD(&tk->rp.kp.list); - ret = trace_probe_init(&tk->tp, event, group, false); + ret = trace_probe_init(&tk->tp, event, group, false, nargs); if (ret < 0) goto error; @@ -740,6 +740,9 @@ static unsigned int number_of_same_symbols(char *func_name) return ctx.count; } +static int trace_kprobe_entry_handler(struct kretprobe_instance *ri, + struct pt_regs *regs); + static int __trace_kprobe_create(int argc, const char *argv[]) { /* @@ -948,6 +951,11 @@ static int __trace_kprobe_create(int argc, const char *argv[]) if (ret) goto error; /* This can be -ENOMEM */ } + /* entry handler for kretprobe */ + if (is_return && tk->tp.entry_arg) { + tk->rp.entry_handler = trace_kprobe_entry_handler; + tk->rp.data_size = traceprobe_get_entry_data_size(&tk->tp); + } ptype = is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; ret = traceprobe_set_print_fmt(&tk->tp, ptype); @@ -1303,8 +1311,8 @@ static const struct file_operations kprobe_profile_ops = { /* Note that we don't verify it, since the code does not come from user space */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; @@ -1329,6 +1337,9 @@ retry: case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); break; + case FETCH_OP_EDATA: + val = *(unsigned long *)((unsigned long)edata + code->offset); + break; #endif case FETCH_NOP_SYMBOL: /* Ignore a place holder */ code++; @@ -1359,7 +1370,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, NULL); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tk->tp.size + dsize); @@ -1368,7 +1379,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, fbuffer.regs = regs; entry->ip = (unsigned long)tk->rp.kp.addr; - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, NULL, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } @@ -1384,6 +1395,31 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) NOKPROBE_SYMBOL(kprobe_trace_func); /* Kretprobe handler */ + +static int trace_kprobe_entry_handler(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct kretprobe *rp = get_kretprobe(ri); + struct trace_kprobe *tk; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when + * the kretprobe is unregister on another CPU between kretprobe's + * trampoline_handler and this function. + */ + if (unlikely(!rp)) + return -ENOENT; + + tk = container_of(rp, struct trace_kprobe, rp); + + /* store argument values into ri->data as entry data */ + if (tk->tp.entry_arg) + store_trace_entry_data(ri->data, &tk->tp, regs); + + return 0; +} + + static nokprobe_inline void __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs, @@ -1399,7 +1435,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, ri->data); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tk->tp.size + dsize); @@ -1409,7 +1445,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, fbuffer.regs = regs; entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = get_kretprobe_retaddr(ri); - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, ri->data, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } @@ -1557,7 +1593,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) if (hlist_empty(head)) return 0; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, NULL); __size = sizeof(*entry) + tk->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1568,7 +1604,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, NULL, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); return 0; @@ -1593,7 +1629,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (hlist_empty(head)) return; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, ri->data); __size = sizeof(*entry) + tk->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1604,7 +1640,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = get_kretprobe_retaddr(ri); - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, ri->data, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); } diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index bd0d01d00fb9..a8e28f9b9271 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2444,6 +2444,9 @@ static int timerlat_fd_open(struct inode *inode, struct file *file) tlat = this_cpu_tmr_var(); tlat->count = 0; + hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); + tlat->timer.function = timerlat_irq; + migrate_enable(); return 0; }; @@ -2526,9 +2529,6 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, tlat->tracing_thread = false; tlat->kthread = current; - hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); - tlat->timer.function = timerlat_irq; - /* Annotate now to drift new period */ tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3e7fa44dc2b2..d8b302d01083 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1587,12 +1587,11 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, { struct print_entry *field; struct trace_seq *s = &iter->seq; - int max = iter->ent_size - offsetof(struct print_entry, buf); trace_assign_type(field, iter->ent); seq_print_ip_sym(s, field->ip, flags); - trace_seq_printf(s, ": %.*s", max, field->buf); + trace_seq_printf(s, ": %s", field->buf); return trace_handle_return(s); } @@ -1601,11 +1600,10 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, struct trace_event *event) { struct print_entry *field; - int max = iter->ent_size - offsetof(struct print_entry, buf); trace_assign_type(field, iter->ent); - trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf); + trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf); return trace_handle_return(&iter->seq); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 4dc74d73fc1d..217169de0920 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -594,6 +594,8 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type, return 0; } +static int __store_entry_arg(struct trace_probe *tp, int argnum); + static int parse_btf_arg(char *varname, struct fetch_insn **pcode, struct fetch_insn *end, struct traceprobe_parse_context *ctx) @@ -618,11 +620,7 @@ static int parse_btf_arg(char *varname, return -EOPNOTSUPP; } - if (ctx->flags & TPARG_FL_RETURN) { - if (strcmp(varname, "$retval") != 0) { - trace_probe_log_err(ctx->offset, NO_BTFARG); - return -ENOENT; - } + if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) { code->op = FETCH_OP_RETVAL; /* Check whether the function return type is not void */ if (query_btf_context(ctx) == 0) { @@ -654,11 +652,21 @@ static int parse_btf_arg(char *varname, const char *name = btf_name_by_offset(ctx->btf, params[i].name_off); if (name && !strcmp(name, varname)) { - code->op = FETCH_OP_ARG; - if (ctx->flags & TPARG_FL_TPOINT) - code->param = i + 1; - else - code->param = i; + if (tparg_is_function_entry(ctx->flags)) { + code->op = FETCH_OP_ARG; + if (ctx->flags & TPARG_FL_TPOINT) + code->param = i + 1; + else + code->param = i; + } else if (tparg_is_function_return(ctx->flags)) { + code->op = FETCH_OP_EDATA; + ret = __store_entry_arg(ctx->tp, i); + if (ret < 0) { + /* internal error */ + return ret; + } + code->offset = ret; + } tid = params[i].type; goto found; } @@ -755,6 +763,110 @@ static int check_prepare_btf_string_fetch(char *typename, #endif +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + +static int __store_entry_arg(struct trace_probe *tp, int argnum) +{ + struct probe_entry_arg *earg = tp->entry_arg; + bool match = false; + int i, offset; + + if (!earg) { + earg = kzalloc(sizeof(*tp->entry_arg), GFP_KERNEL); + if (!earg) + return -ENOMEM; + earg->size = 2 * tp->nr_args + 1; + earg->code = kcalloc(earg->size, sizeof(struct fetch_insn), + GFP_KERNEL); + if (!earg->code) { + kfree(earg); + return -ENOMEM; + } + /* Fill the code buffer with 'end' to simplify it */ + for (i = 0; i < earg->size; i++) + earg->code[i].op = FETCH_OP_END; + tp->entry_arg = earg; + } + + offset = 0; + for (i = 0; i < earg->size - 1; i++) { + switch (earg->code[i].op) { + case FETCH_OP_END: + earg->code[i].op = FETCH_OP_ARG; + earg->code[i].param = argnum; + earg->code[i + 1].op = FETCH_OP_ST_EDATA; + earg->code[i + 1].offset = offset; + return offset; + case FETCH_OP_ARG: + match = (earg->code[i].param == argnum); + break; + case FETCH_OP_ST_EDATA: + offset = earg->code[i].offset; + if (match) + return offset; + offset += sizeof(unsigned long); + break; + default: + break; + } + } + return -ENOSPC; +} + +int traceprobe_get_entry_data_size(struct trace_probe *tp) +{ + struct probe_entry_arg *earg = tp->entry_arg; + int i, size = 0; + + if (!earg) + return 0; + + for (i = 0; i < earg->size; i++) { + switch (earg->code[i].op) { + case FETCH_OP_END: + goto out; + case FETCH_OP_ST_EDATA: + size = earg->code[i].offset + sizeof(unsigned long); + break; + default: + break; + } + } +out: + return size; +} + +void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs) +{ + struct probe_entry_arg *earg = tp->entry_arg; + unsigned long val; + int i; + + if (!earg) + return; + + for (i = 0; i < earg->size; i++) { + struct fetch_insn *code = &earg->code[i]; + + switch (code->op) { + case FETCH_OP_ARG: + val = regs_get_kernel_argument(regs, code->param); + break; + case FETCH_OP_ST_EDATA: + *(unsigned long *)((unsigned long)edata + code->offset) = val; + break; + case FETCH_OP_END: + goto end; + default: + break; + } + } +end: + return; +} +NOKPROBE_SYMBOL(store_trace_entry_data) +#endif + #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) /* Parse $vars. @orig_arg points '$', which syncs to @ctx->offset */ @@ -830,7 +942,7 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t, #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API len = str_has_prefix(arg, "arg"); - if (len && tparg_is_function_entry(ctx->flags)) { + if (len) { ret = kstrtoul(arg + len, 10, ¶m); if (ret) goto inval; @@ -839,15 +951,29 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t, err = TP_ERR_BAD_ARG_NUM; goto inval; } + param--; /* argN starts from 1, but internal arg[N] starts from 0 */ - code->op = FETCH_OP_ARG; - code->param = (unsigned int)param - 1; - /* - * The tracepoint probe will probe a stub function, and the - * first parameter of the stub is a dummy and should be ignored. - */ - if (ctx->flags & TPARG_FL_TPOINT) - code->param++; + if (tparg_is_function_entry(ctx->flags)) { + code->op = FETCH_OP_ARG; + code->param = (unsigned int)param; + /* + * The tracepoint probe will probe a stub function, and the + * first parameter of the stub is a dummy and should be ignored. + */ + if (ctx->flags & TPARG_FL_TPOINT) + code->param++; + } else if (tparg_is_function_return(ctx->flags)) { + /* function entry argument access from return probe */ + ret = __store_entry_arg(ctx->tp, param); + if (ret < 0) /* This error should be an internal error */ + return ret; + + code->op = FETCH_OP_EDATA; + code->offset = ret; + } else { + err = TP_ERR_NOFENTRY_ARGS; + goto inval; + } return 0; } #endif @@ -1037,7 +1163,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type, break; default: if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */ - if (!tparg_is_function_entry(ctx->flags)) { + if (!tparg_is_function_entry(ctx->flags) && + !tparg_is_function_return(ctx->flags)) { trace_probe_log_err(ctx->offset, NOSUP_BTFARG); return -EINVAL; } @@ -1090,67 +1217,45 @@ static int __parse_bitfield_probe_arg(const char *bf, return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; } -/* String length checking wrapper */ -static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, - struct probe_arg *parg, - struct traceprobe_parse_context *ctx) +/* Split type part from @arg and return it. */ +static char *parse_probe_arg_type(char *arg, struct probe_arg *parg, + struct traceprobe_parse_context *ctx) { - struct fetch_insn *code, *scode, *tmp = NULL; - char *t, *t2, *t3; - int ret, len; - char *arg; - - arg = kstrdup(argv, GFP_KERNEL); - if (!arg) - return -ENOMEM; - - ret = -EINVAL; - len = strlen(arg); - if (len > MAX_ARGSTR_LEN) { - trace_probe_log_err(ctx->offset, ARG_TOO_LONG); - goto out; - } else if (len == 0) { - trace_probe_log_err(ctx->offset, NO_ARG_BODY); - goto out; - } - - ret = -ENOMEM; - parg->comm = kstrdup(arg, GFP_KERNEL); - if (!parg->comm) - goto out; + char *t = NULL, *t2, *t3; + int offs; - ret = -EINVAL; t = strchr(arg, ':'); if (t) { - *t = '\0'; - t2 = strchr(++t, '['); + *t++ = '\0'; + t2 = strchr(t, '['); if (t2) { *t2++ = '\0'; t3 = strchr(t2, ']'); if (!t3) { - int offs = t2 + strlen(t2) - arg; + offs = t2 + strlen(t2) - arg; trace_probe_log_err(ctx->offset + offs, ARRAY_NO_CLOSE); - goto out; + return ERR_PTR(-EINVAL); } else if (t3[1] != '\0') { trace_probe_log_err(ctx->offset + t3 + 1 - arg, BAD_ARRAY_SUFFIX); - goto out; + return ERR_PTR(-EINVAL); } *t3 = '\0'; if (kstrtouint(t2, 0, &parg->count) || !parg->count) { trace_probe_log_err(ctx->offset + t2 - arg, BAD_ARRAY_NUM); - goto out; + return ERR_PTR(-EINVAL); } if (parg->count > MAX_ARRAY_LEN) { trace_probe_log_err(ctx->offset + t2 - arg, ARRAY_TOO_BIG); - goto out; + return ERR_PTR(-EINVAL); } } } + offs = t ? t - arg : 0; /* * Since $comm and immediate string can not be dereferenced, @@ -1159,72 +1264,54 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, if (!(ctx->flags & TPARG_FL_TEVENT) && (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 || strncmp(arg, "\\\"", 2) == 0)) { - /* The type of $comm must be "string", and not an array. */ - if (parg->count || (t && strcmp(t, "string"))) - goto out; + /* The type of $comm must be "string", and not an array type. */ + if (parg->count || (t && strcmp(t, "string"))) { + trace_probe_log_err(ctx->offset + offs, NEED_STRING_TYPE); + return ERR_PTR(-EINVAL); + } parg->type = find_fetch_type("string", ctx->flags); } else parg->type = find_fetch_type(t, ctx->flags); - if (!parg->type) { - trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE); - goto out; - } - parg->offset = *size; - *size += parg->type->size * (parg->count ?: 1); - ret = -ENOMEM; - if (parg->count) { - len = strlen(parg->type->fmttype) + 6; - parg->fmt = kmalloc(len, GFP_KERNEL); - if (!parg->fmt) - goto out; - snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, - parg->count); + if (!parg->type) { + trace_probe_log_err(ctx->offset + offs, BAD_TYPE); + return ERR_PTR(-EINVAL); } - code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); - if (!code) - goto out; - code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; - - ctx->last_type = NULL; - ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], - ctx); - if (ret) - goto fail; + return t; +} - /* Update storing type if BTF is available */ - if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && - ctx->last_type) { - if (!t) { - parg->type = find_fetch_type_from_btf_type(ctx); - } else if (strstr(t, "string")) { - ret = check_prepare_btf_string_fetch(t, &code, ctx); - if (ret) - goto fail; - } - } +/* After parsing, adjust the fetch_insn according to the probe_arg */ +static int finalize_fetch_insn(struct fetch_insn *code, + struct probe_arg *parg, + char *type, + int type_offset, + struct traceprobe_parse_context *ctx) +{ + struct fetch_insn *scode; + int ret; - ret = -EINVAL; /* Store operation */ if (parg->type->is_string) { + /* Check bad combination of the type and the last fetch_insn. */ if (!strcmp(parg->type->name, "symstr")) { if (code->op != FETCH_OP_REG && code->op != FETCH_OP_STACK && code->op != FETCH_OP_RETVAL && code->op != FETCH_OP_ARG && code->op != FETCH_OP_DEREF && code->op != FETCH_OP_TP_ARG) { - trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), + trace_probe_log_err(ctx->offset + type_offset, BAD_SYMSTRING); - goto fail; + return -EINVAL; } } else { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM && code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) { - trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), + trace_probe_log_err(ctx->offset + type_offset, BAD_STRING); - goto fail; + return -EINVAL; } } + if (!strcmp(parg->type->name, "symstr") || (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM || code->op == FETCH_OP_DATA) || code->op == FETCH_OP_TP_ARG || @@ -1240,9 +1327,10 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, code++; if (code->op != FETCH_OP_NOP) { trace_probe_log_err(ctx->offset, TOO_MANY_OPS); - goto fail; + return -EINVAL; } } + /* If op == DEREF, replace it with STRING */ if (!strcmp(parg->type->name, "ustring") || code->op == FETCH_OP_UDEREF) @@ -1263,47 +1351,134 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, code++; if (code->op != FETCH_OP_NOP) { trace_probe_log_err(ctx->offset, TOO_MANY_OPS); - goto fail; + return -E2BIG; } code->op = FETCH_OP_ST_RAW; code->size = parg->type->size; } + + /* Save storing fetch_insn. */ scode = code; + /* Modify operation */ - if (t != NULL) { - ret = __parse_bitfield_probe_arg(t, parg->type, &code); + if (type != NULL) { + /* Bitfield needs a special fetch_insn. */ + ret = __parse_bitfield_probe_arg(type, parg->type, &code); if (ret) { - trace_probe_log_err(ctx->offset + t - arg, BAD_BITFIELD); - goto fail; + trace_probe_log_err(ctx->offset + type_offset, BAD_BITFIELD); + return ret; } } else if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && ctx->last_type) { + /* If user not specified the type, try parsing BTF bitfield. */ ret = parse_btf_bitfield(&code, ctx); if (ret) - goto fail; + return ret; } - ret = -EINVAL; + /* Loop(Array) operation */ if (parg->count) { if (scode->op != FETCH_OP_ST_MEM && scode->op != FETCH_OP_ST_STRING && scode->op != FETCH_OP_ST_USTRING) { - trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), - BAD_STRING); - goto fail; + trace_probe_log_err(ctx->offset + type_offset, BAD_STRING); + return -EINVAL; } code++; if (code->op != FETCH_OP_NOP) { trace_probe_log_err(ctx->offset, TOO_MANY_OPS); - goto fail; + return -E2BIG; } code->op = FETCH_OP_LP_ARRAY; code->param = parg->count; } + + /* Finalize the fetch_insn array. */ code++; code->op = FETCH_OP_END; - ret = 0; + return 0; +} + +/* String length checking wrapper */ +static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, + struct probe_arg *parg, + struct traceprobe_parse_context *ctx) +{ + struct fetch_insn *code, *tmp = NULL; + char *type, *arg; + int ret, len; + + len = strlen(argv); + if (len > MAX_ARGSTR_LEN) { + trace_probe_log_err(ctx->offset, ARG_TOO_LONG); + return -E2BIG; + } else if (len == 0) { + trace_probe_log_err(ctx->offset, NO_ARG_BODY); + return -EINVAL; + } + + arg = kstrdup(argv, GFP_KERNEL); + if (!arg) + return -ENOMEM; + + parg->comm = kstrdup(arg, GFP_KERNEL); + if (!parg->comm) { + ret = -ENOMEM; + goto out; + } + + type = parse_probe_arg_type(arg, parg, ctx); + if (IS_ERR(type)) { + ret = PTR_ERR(type); + goto out; + } + + code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); + if (!code) { + ret = -ENOMEM; + goto out; + } + code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; + + ctx->last_type = NULL; + ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], + ctx); + if (ret < 0) + goto fail; + + /* Update storing type if BTF is available */ + if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && + ctx->last_type) { + if (!type) { + parg->type = find_fetch_type_from_btf_type(ctx); + } else if (strstr(type, "string")) { + ret = check_prepare_btf_string_fetch(type, &code, ctx); + if (ret) + goto fail; + } + } + parg->offset = *size; + *size += parg->type->size * (parg->count ?: 1); + + if (parg->count) { + len = strlen(parg->type->fmttype) + 6; + parg->fmt = kmalloc(len, GFP_KERNEL); + if (!parg->fmt) { + ret = -ENOMEM; + goto out; + } + snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, + parg->count); + } + + ret = finalize_fetch_insn(code, parg, type, type ? type - arg : 0, ctx); + if (ret < 0) + goto fail; + + for (; code < tmp + FETCH_INSN_MAX; code++) + if (code->op == FETCH_OP_END) + break; /* Shrink down the code buffer */ parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL); if (!parg->code) @@ -1312,7 +1487,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1)); fail: - if (ret) { + if (ret < 0) { for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) if (code->op == FETCH_NOP_SYMBOL || code->op == FETCH_OP_DATA) @@ -1375,9 +1550,7 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg, struct probe_arg *parg = &tp->args[i]; const char *body; - /* Increment count for freeing args in error case */ - tp->nr_args++; - + ctx->tp = tp; body = strchr(arg, '='); if (body) { if (body - arg > MAX_ARG_NAME_LEN) { @@ -1434,7 +1607,8 @@ static int argv_has_var_arg(int argc, const char *argv[], int *args_idx, if (str_has_prefix(argv[i], "$arg")) { trace_probe_log_set_index(i + 2); - if (!tparg_is_function_entry(ctx->flags)) { + if (!tparg_is_function_entry(ctx->flags) && + !tparg_is_function_return(ctx->flags)) { trace_probe_log_err(0, NOFENTRY_ARGS); return -EINVAL; } @@ -1757,12 +1931,18 @@ void trace_probe_cleanup(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); + if (tp->entry_arg) { + kfree(tp->entry_arg->code); + kfree(tp->entry_arg); + tp->entry_arg = NULL; + } + if (tp->event) trace_probe_unlink(tp); } int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group, bool alloc_filter) + const char *group, bool alloc_filter, int nargs) { struct trace_event_call *call; size_t size = sizeof(struct trace_probe_event); @@ -1798,6 +1978,11 @@ int trace_probe_init(struct trace_probe *tp, const char *event, goto error; } + tp->nr_args = nargs; + /* Make sure pointers in args[] are NULL */ + if (nargs) + memset(tp->args, 0, sizeof(tp->args[0]) * nargs); + return 0; error: diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 850d9ecb6765..cef3a50628a3 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -92,6 +92,7 @@ enum fetch_op { FETCH_OP_ARG, /* Function argument : .param */ FETCH_OP_FOFFS, /* File offset: .immediate */ FETCH_OP_DATA, /* Allocated data: .data */ + FETCH_OP_EDATA, /* Entry data: .offset */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ FETCH_OP_UDEREF, /* User-space Dereference: .offset */ @@ -102,6 +103,7 @@ enum fetch_op { FETCH_OP_ST_STRING, /* String: .offset, .size */ FETCH_OP_ST_USTRING, /* User String: .offset, .size */ FETCH_OP_ST_SYMSTR, /* Kernel Symbol String: .offset, .size */ + FETCH_OP_ST_EDATA, /* Store Entry Data: .offset */ // Stage 4 (modify) op FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ // Stage 5 (loop) op @@ -232,6 +234,11 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; +struct probe_entry_arg { + struct fetch_insn *code; + unsigned int size; /* The entry data size */ +}; + struct trace_uprobe_filter { rwlock_t rwlock; int nr_systemwide; @@ -253,6 +260,7 @@ struct trace_probe { struct trace_probe_event *event; ssize_t size; /* trace entry size */ unsigned int nr_args; + struct probe_entry_arg *entry_arg; /* This is only for return probe */ struct probe_arg args[]; }; @@ -338,7 +346,7 @@ static inline bool trace_probe_has_single_file(struct trace_probe *tp) } int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group, bool alloc_filter); + const char *group, bool alloc_filter, int nargs); void trace_probe_cleanup(struct trace_probe *tp); int trace_probe_append(struct trace_probe *tp, struct trace_probe *to); void trace_probe_unlink(struct trace_probe *tp); @@ -355,6 +363,18 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args, u8 *data, void *field); +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API +int traceprobe_get_entry_data_size(struct trace_probe *tp); +/* This is a runtime function to store entry data */ +void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs); +#else /* !CONFIG_HAVE_FUNCTION_ARG_ACCESS_API */ +static inline int traceprobe_get_entry_data_size(struct trace_probe *tp) +{ + return 0; +} +#define store_trace_entry_data(edata, tp, regs) do { } while (0) +#endif + #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) #define trace_probe_for_each_link_rcu(pos, tp) \ @@ -381,6 +401,11 @@ static inline bool tparg_is_function_entry(unsigned int flags) return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY); } +static inline bool tparg_is_function_return(unsigned int flags) +{ + return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_RETURN); +} + struct traceprobe_parse_context { struct trace_event_call *event; /* BTF related parameters */ @@ -392,6 +417,7 @@ struct traceprobe_parse_context { const struct btf_type *last_type; /* Saved type */ u32 last_bitoffs; /* Saved bitoffs */ u32 last_bitsize; /* Saved bitsize */ + struct trace_probe *tp; unsigned int flags; int offset; }; @@ -506,7 +532,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NO_BTFARG, "This variable is not found at this probe point"),\ C(NO_BTF_ENTRY, "No BTF entry for this probe point"), \ C(BAD_VAR_ARGS, "$arg* must be an independent parameter without name etc."),\ - C(NOFENTRY_ARGS, "$arg* can be used only on function entry"), \ + C(NOFENTRY_ARGS, "$arg* can be used only on function entry or exit"), \ C(DOUBLE_ARGS, "$arg* can be used only once in the parameters"), \ C(ARGS_2LONG, "$arg* failed because the argument list is too long"), \ C(ARGIDX_2BIG, "$argN index is too big"), \ @@ -515,7 +541,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \ C(NO_BTF_FIELD, "This field is not found."), \ C(BAD_BTF_TID, "Failed to get BTF type info."),\ - C(BAD_TYPE4STR, "This type does not fit for string."), + C(BAD_TYPE4STR, "This type does not fit for string."),\ + C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"), #undef C #define C(a, b) TP_ERR_##a diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 3935b347f874..2caf0d2afb32 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -54,7 +54,7 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf) * If dest is NULL, don't store result and return required dynamic data size. */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, void *dest, void *base); static nokprobe_inline int fetch_store_strlen(unsigned long addr); static nokprobe_inline int @@ -232,7 +232,7 @@ array: /* Sum up total data length for dynamic arrays (strings) */ static nokprobe_inline int -__get_data_size(struct trace_probe *tp, struct pt_regs *regs) +__get_data_size(struct trace_probe *tp, struct pt_regs *regs, void *edata) { struct probe_arg *arg; int i, len, ret = 0; @@ -240,7 +240,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs) for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; if (unlikely(arg->dynamic)) { - len = process_fetch_insn(arg->code, regs, NULL, NULL); + len = process_fetch_insn(arg->code, regs, edata, NULL, NULL); if (len > 0) ret += len; } @@ -251,7 +251,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs) /* Store the value of each argument */ static nokprobe_inline void -store_trace_args(void *data, struct trace_probe *tp, void *rec, +store_trace_args(void *data, struct trace_probe *tp, void *rec, void *edata, int header_size, int maxlen) { struct probe_arg *arg; @@ -266,7 +266,7 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec, /* Point the dynamic data area if needed */ if (unlikely(arg->dynamic)) *dl = make_data_loc(maxlen, dyndata - base); - ret = process_fetch_insn(arg->code, rec, dl, base); + ret = process_fetch_insn(arg->code, rec, edata, dl, base); if (arg->dynamic && likely(ret > 0)) { dyndata += ret; maxlen -= ret; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index a84b85d8aac1..9e461362450a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -211,8 +211,8 @@ static unsigned long translate_user_vaddr(unsigned long file_offset) /* Note that we don't verify it, since the code does not come from user space */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; @@ -337,7 +337,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) if (!tu) return ERR_PTR(-ENOMEM); - ret = trace_probe_init(&tu->tp, event, group, true); + ret = trace_probe_init(&tu->tp, event, group, true, nargs); if (ret < 0) goto error; @@ -1490,11 +1490,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs); + dsize = __get_data_size(&tu->tp, regs, NULL); esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) ret |= uprobe_trace_func(tu, regs, ucb, dsize); @@ -1525,11 +1525,11 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs); + dsize = __get_data_size(&tu->tp, regs, NULL); esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) uretprobe_trace_func(tu, func, regs, ucb, dsize); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index c774e560f2f9..a4dcf0f24352 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; |