From f3bea49115b21e0995abf41402ad2f4d9c69eda4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 2 Jul 2014 23:23:31 -0400 Subject: ftrace/x86: Add dynamic allocated trampoline for ftrace_ops The current method of handling multiple function callbacks is to register a list function callback that calls all the other callbacks based on their hash tables and compare it to the function that the callback was called on. But this is very inefficient. For example, if you are tracing all functions in the kernel and then add a kprobe to a function such that the kprobe uses ftrace, the mcount trampoline will switch from calling the function trace callback to calling the list callback that will iterate over all registered ftrace_ops (in this case, the function tracer and the kprobes callback). That means for every function being traced it checks the hash of the ftrace_ops for function tracing and kprobes, even though the kprobes is only set at a single function. The kprobes ftrace_ops is checked for every function being traced! Instead of calling the list function for functions that are only being traced by a single callback, we can call a dynamically allocated trampoline that calls the callback directly. The function graph tracer already uses a direct call trampoline when it is being traced by itself but it is not dynamically allocated. It's trampoline is static in the kernel core. The infrastructure that called the function graph trampoline can also be used to call a dynamically allocated one. For now, only ftrace_ops that are not dynamically allocated can have a trampoline. That is, users such as function tracer or stack tracer. kprobes and perf allocate their ftrace_ops, and until there's a safe way to free the trampoline, it can not be used. The dynamically allocated ftrace_ops may, although, use the trampoline if the kernel is not compiled with CONFIG_PREEMPT. But that will come later. Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 195 ++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/mcount_64.S | 25 +++++- 2 files changed, 210 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3386dc9aa333..e4d48f6cad86 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -644,13 +645,8 @@ int __init ftrace_dyn_arch_init(void) { return 0; } -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -#ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_graph_call(void); +#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER) static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -664,6 +660,193 @@ static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) */ return calc.code; } +#endif + +/* Currently only x86_64 supports dynamic trampolines */ +#ifdef CONFIG_X86_64 + +#ifdef CONFIG_MODULES +#include +/* Module allocation simplifies allocating memory for code */ +static inline void *alloc_tramp(unsigned long size) +{ + return module_alloc(size); +} +static inline void tramp_free(void *tramp) +{ + module_free(NULL, tramp); +} +#else +/* Trampolines can only be created if modules are supported */ +static inline void *alloc_tramp(unsigned long size) +{ + return NULL; +} +static inline void tramp_free(void *tramp) { } +#endif + +/* Defined as markers to the end of the ftrace default trampolines */ +extern void ftrace_caller_end(void); +extern void ftrace_regs_caller_end(void); +extern void ftrace_return(void); +extern void ftrace_caller_op_ptr(void); +extern void ftrace_regs_caller_op_ptr(void); + +/* movq function_trace_op(%rip), %rdx */ +/* 0x48 0x8b 0x15 */ +#define OP_REF_SIZE 7 + +/* + * The ftrace_ops is passed to the function callback. Since the + * trampoline only services a single ftrace_ops, we can pass in + * that ops directly. + * + * The ftrace_op_code_union is used to create a pointer to the + * ftrace_ops that will be passed to the callback function. + */ +union ftrace_op_code_union { + char code[OP_REF_SIZE]; + struct { + char op[3]; + int offset; + } __attribute__((packed)); +}; + +static unsigned long create_trampoline(struct ftrace_ops *ops) +{ + unsigned const char *jmp; + unsigned long start_offset; + unsigned long end_offset; + unsigned long op_offset; + unsigned long offset; + unsigned long size; + unsigned long ip; + unsigned long *ptr; + void *trampoline; + /* 48 8b 15 is movq (%rip), %rdx */ + unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; + union ftrace_op_code_union op_ptr; + int ret; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + end_offset = (unsigned long)ftrace_regs_caller_end; + op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + } else { + start_offset = (unsigned long)ftrace_caller; + end_offset = (unsigned long)ftrace_caller_end; + op_offset = (unsigned long)ftrace_caller_op_ptr; + } + + size = end_offset - start_offset; + + /* + * Allocate enough size to store the ftrace_caller code, + * the jmp to ftrace_return, as well as the address of + * the ftrace_ops this trampoline is used for. + */ + trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); + if (!trampoline) + return 0; + + /* Copy ftrace_caller onto the trampoline memory */ + ret = probe_kernel_read(trampoline, (void *)start_offset, size); + if (WARN_ON(ret < 0)) { + tramp_free(trampoline); + return 0; + } + + ip = (unsigned long)trampoline + size; + + /* The trampoline ends with a jmp to ftrace_return */ + jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return); + memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); + + /* + * The address of the ftrace_ops that is used for this trampoline + * is stored at the end of the trampoline. This will be used to + * load the third parameter for the callback. Basically, that + * location at the end of the trampoline takes the place of + * the global function_trace_op variable. + */ + + ptr = (unsigned long *)(trampoline + size + MCOUNT_INSN_SIZE); + *ptr = (unsigned long)ops; + + op_offset -= start_offset; + memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); + + /* Are we pointing to the reference? */ + if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) { + tramp_free(trampoline); + return 0; + } + + /* Load the contents of ptr into the callback parameter */ + offset = (unsigned long)ptr; + offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE; + + op_ptr.offset = offset; + + /* put in the new offset to the ftrace_ops */ + memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + + /* ALLOC_TRAMP flags lets us know we created it */ + ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; + + return (unsigned long)trampoline; +} + +void arch_ftrace_update_trampoline(struct ftrace_ops *ops) +{ + ftrace_func_t func; + unsigned char *new; + unsigned long start_offset; + unsigned long call_offset; + unsigned long offset; + unsigned long ip; + int ret; + + if (ops->trampoline) { + /* + * The ftrace_ops caller may set up its own trampoline. + * In such a case, this code must not modify it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + } else { + ops->trampoline = create_trampoline(ops); + if (!ops->trampoline) + return; + } + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + call_offset = (unsigned long)ftrace_regs_call; + } else { + start_offset = (unsigned long)ftrace_caller; + call_offset = (unsigned long)ftrace_call; + } + + offset = call_offset - start_offset; + ip = ops->trampoline + offset; + + func = ftrace_ops_get_func(ops); + + /* Do a safe modify in case the trampoline is executing */ + new = ftrace_call_replace(ip, (unsigned long)func); + ret = update_ftrace_func(ip, new); + + /* The update should never fail */ + WARN_ON(ret); +} +#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); static int ftrace_mod_jmp(unsigned long ip, void *func) { diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index c73aecf10d34..42f0cdd20baf 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -28,9 +28,11 @@ ENTRY(function_hook) END(function_hook) /* skip is set if stack has been adjusted */ -.macro ftrace_caller_setup skip=0 +.macro ftrace_caller_setup trace_label skip=0 MCOUNT_SAVE_FRAME \skip + /* Save this location */ +GLOBAL(\trace_label) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -46,7 +48,7 @@ END(function_hook) .endm ENTRY(ftrace_caller) - ftrace_caller_setup + ftrace_caller_setup ftrace_caller_op_ptr /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx @@ -54,7 +56,14 @@ GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME -ftrace_return: + + /* + * The copied trampoline must call ftrace_return as it + * still may need to call the function graph tracer. + */ +GLOBAL(ftrace_caller_end) + +GLOBAL(ftrace_return) #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -70,7 +79,7 @@ ENTRY(ftrace_regs_caller) pushfq /* skip=8 to skip flags saved in SS */ - ftrace_caller_setup 8 + ftrace_caller_setup ftrace_regs_caller_op_ptr 8 /* Save the rest of pt_regs */ movq %r15, R15(%rsp) @@ -122,6 +131,14 @@ GLOBAL(ftrace_regs_call) /* Restore flags */ popfq + /* + * As this jmp to ftrace_return can be a short jump + * it must not be copied into the trampoline. + * The trampoline will add the code to jump + * to the return. + */ +GLOBAL(ftrace_regs_caller_end) + jmp ftrace_return popfq -- cgit v1.2.3 From 15d5b02cc575e5b20ddfa1645fc1242f0b0ba1c8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 3 Jul 2014 14:51:36 -0400 Subject: ftrace/x86: Show trampoline call function in enabled_functions The file /sys/kernel/debug/tracing/eneabled_functions is used to debug ftrace function hooks. Add to the output what function is being called by the trampoline if the arch supports it. Add support for this feature in x86_64. Cc: H. Peter Anvin Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 98 ++++++++++++++++++++++++++++++++++++++++++------ kernel/trace/ftrace.c | 22 ++++++++++- 2 files changed, 106 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e4d48f6cad86..ca17c20a1010 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -48,7 +48,7 @@ int ftrace_arch_code_modify_post_process(void) union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; struct { - char e8; + unsigned char e8; int offset; } __attribute__((packed)); }; @@ -797,12 +797,26 @@ static unsigned long create_trampoline(struct ftrace_ops *ops) return (unsigned long)trampoline; } +static unsigned long calc_trampoline_call_offset(bool save_regs) +{ + unsigned long start_offset; + unsigned long call_offset; + + if (save_regs) { + start_offset = (unsigned long)ftrace_regs_caller; + call_offset = (unsigned long)ftrace_regs_call; + } else { + start_offset = (unsigned long)ftrace_caller; + call_offset = (unsigned long)ftrace_call; + } + + return call_offset - start_offset; +} + void arch_ftrace_update_trampoline(struct ftrace_ops *ops) { ftrace_func_t func; unsigned char *new; - unsigned long start_offset; - unsigned long call_offset; unsigned long offset; unsigned long ip; int ret; @@ -820,15 +834,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) return; } - if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { - start_offset = (unsigned long)ftrace_regs_caller; - call_offset = (unsigned long)ftrace_regs_call; - } else { - start_offset = (unsigned long)ftrace_caller; - call_offset = (unsigned long)ftrace_call; - } - - offset = call_offset - start_offset; + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); ip = ops->trampoline + offset; func = ftrace_ops_get_func(ops); @@ -840,6 +846,74 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) /* The update should never fail */ WARN_ON(ret); } + +/* Return the address of the function the trampoline calls */ +static void *addr_from_call(void *ptr) +{ + union ftrace_code_union calc; + int ret; + + ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); + if (WARN_ON_ONCE(ret < 0)) + return NULL; + + /* Make sure this is a call */ + if (WARN_ON_ONCE(calc.e8 != 0xe8)) { + pr_warn("Expected e8, got %x\n", calc.e8); + return NULL; + } + + return ptr + MCOUNT_INSN_SIZE + calc.offset; +} + +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer); + +/* + * If the ops->trampoline was not allocated, then it probably + * has a static trampoline func, or is the ftrace caller itself. + */ +static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + unsigned long offset; + bool save_regs = rec->flags & FTRACE_FL_REGS_EN; + void *ptr; + + if (ops && ops->trampoline) { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* + * We only know about function graph tracer setting as static + * trampoline. + */ + if (ops->trampoline == FTRACE_GRAPH_ADDR) + return (void *)prepare_ftrace_return; +#endif + return NULL; + } + + offset = calc_trampoline_call_offset(save_regs); + + if (save_regs) + ptr = (void *)FTRACE_REGS_ADDR + offset; + else + ptr = (void *)FTRACE_ADDR + offset; + + return addr_from_call(ptr); +} + +void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + unsigned long offset; + + /* If we didn't allocate this trampoline, consider it static */ + if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return static_tramp_func(ops, rec); + + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); + return addr_from_call((void *)ops->trampoline + offset); +} + + #endif /* CONFIG_X86_64 */ #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 15f85eac7e95..422e1f8300b1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2952,6 +2952,22 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&ftrace_lock); } +void * __weak +arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + return NULL; +} + +static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops, + struct dyn_ftrace *rec) +{ + void *ptr; + + ptr = arch_ftrace_trampoline_func(ops, rec); + if (ptr) + seq_printf(m, " ->%pS", ptr); +} + static int t_show(struct seq_file *m, void *v) { struct ftrace_iterator *iter = m->private; @@ -2975,19 +2991,21 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) { + struct ftrace_ops *ops = NULL; + seq_printf(m, " (%ld)%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { - struct ftrace_ops *ops; - ops = ftrace_find_tramp_ops_any(rec); if (ops) seq_printf(m, "\ttramp: %pS", (void *)ops->trampoline); else seq_printf(m, "\ttramp: ERROR!"); + } + add_trampoline_func(m, ops, rec); } seq_printf(m, "\n"); -- cgit v1.2.3 From 12cce594fa8f12e002e7eb5d10141853c1e6a112 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 3 Jul 2014 15:48:16 -0400 Subject: ftrace/x86: Allow !CONFIG_PREEMPT dynamic ops to use allocated trampolines When the static ftrace_ops (like function tracer) enables tracing, and it is the only callback that is referencing a function, a trampoline is dynamically allocated to the function that calls the callback directly instead of calling a loop function that iterates over all the registered ftrace ops (if more than one ops is registered). But when it comes to dynamically allocated ftrace_ops, where they may be freed, on a CONFIG_PREEMPT kernel there's no way to know when it is safe to free the trampoline. If a task was preempted while executing on the trampoline, there's currently no way to know when it will be off that trampoline. But this is not true when it comes to !CONFIG_PREEMPT. The current method of calling schedule_on_each_cpu() will force tasks off the trampoline, becaues they can not schedule while on it (kernel preemption is not configured). That means it is safe to free a dynamically allocated ftrace ops trampoline when CONFIG_PREEMPT is not configured. Cc: H. Peter Anvin Cc: Paul E. McKenney Acked-by: Borislav Petkov Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 8 ++++++++ kernel/trace/ftrace.c | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ca17c20a1010..4cfeca6ffe11 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -913,6 +913,14 @@ void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec return addr_from_call((void *)ops->trampoline + offset); } +void arch_ftrace_trampoline_free(struct ftrace_ops *ops) +{ + if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + + tramp_free((void *)ops->trampoline); + ops->trampoline = 0; +} #endif /* CONFIG_X86_64 */ #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 422e1f8300b1..eab3123a1fbe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2324,6 +2324,10 @@ static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; +void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) +{ +} + static void control_ops_free(struct ftrace_ops *ops) { free_percpu(ops->disabled); @@ -2475,6 +2479,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { schedule_on_each_cpu(ftrace_sync); + arch_ftrace_trampoline_free(ops); + if (ops->flags & FTRACE_OPS_FL_CONTROL) control_ops_free(ops); } @@ -4725,9 +4731,21 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) static void ftrace_update_trampoline(struct ftrace_ops *ops) { + +/* + * Currently there's no safe way to free a trampoline when the kernel + * is configured with PREEMPT. That is because a task could be preempted + * when it jumped to the trampoline, it may be preempted for a long time + * depending on the system load, and currently there's no way to know + * when it will be off the trampoline. If the trampoline is freed + * too early, when the task runs again, it will be executing on freed + * memory and crash. + */ +#ifdef CONFIG_PREEMPT /* Currently, only non dynamic ops can have a trampoline */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) return; +#endif arch_ftrace_update_trampoline(ops); } -- cgit v1.2.3 From 4fd3279b48605ae3ea509b9b2c02e46aa0975930 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 24 Oct 2014 17:56:04 -0400 Subject: ftrace: Add more information to ftrace_bug() output With the introduction of the dynamic trampolines, it is useful that if things go wrong that ftrace_bug() produces more information about what the current state is. This can help debug issues that may arise. Ftrace has lots of checks to make sure that the state of the system it touchs is exactly what it expects it to be. When it detects an abnormality it calls ftrace_bug() and disables itself to prevent any further damage. It is crucial that ftrace_bug() produces sufficient information that can be used to debug the situation. Cc: Benjamin Herrenschmidt Acked-by: Borislav Petkov Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/powerpc/kernel/ftrace.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- include/linux/ftrace.h | 4 +++- kernel/trace/ftrace.c | 38 +++++++++++++++++++++++++++++--------- 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 390311c0f03d..e66af6d265e8 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -449,7 +449,7 @@ void ftrace_replace_code(int enable) rec = ftrace_rec_iter_record(iter); ret = __ftrace_replace_code(rec, enable); if (ret) { - ftrace_bug(ret, rec->ip); + ftrace_bug(ret, rec); return; } } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4cfeca6ffe11..1aea94d336c7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -583,7 +583,7 @@ void ftrace_replace_code(int enable) remove_breakpoints: pr_warn("Failed on %s (%d):\n", report, count); - ftrace_bug(ret, rec ? rec->ip : 0); + ftrace_bug(ret, rec); for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 06e3ca5a5083..619e37cc17fd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -263,7 +263,9 @@ struct ftrace_func_command { int ftrace_arch_code_modify_prepare(void); int ftrace_arch_code_modify_post_process(void); -void ftrace_bug(int err, unsigned long ip); +struct dyn_ftrace; + +void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eab3123a1fbe..4043332f6720 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1738,10 +1738,13 @@ static void print_ip_ins(const char *fmt, unsigned char *p) printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); } +static struct ftrace_ops * +ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); + /** * ftrace_bug - report and shutdown function tracer * @failed: The failed type (EFAULT, EINVAL, EPERM) - * @ip: The address that failed + * @rec: The record that failed * * The arch code that enables or disables the function tracing * can call ftrace_bug() when it has detected a problem in @@ -1750,8 +1753,10 @@ static void print_ip_ins(const char *fmt, unsigned char *p) * EINVAL - if what is read at @ip is not what was expected * EPERM - if the problem happens on writting to the @ip address */ -void ftrace_bug(int failed, unsigned long ip) +void ftrace_bug(int failed, struct dyn_ftrace *rec) { + unsigned long ip = rec ? rec->ip : 0; + switch (failed) { case -EFAULT: FTRACE_WARN_ON_ONCE(1); @@ -1763,7 +1768,7 @@ void ftrace_bug(int failed, unsigned long ip) pr_info("ftrace failed to modify "); print_ip_sym(ip); print_ip_ins(" actual: ", (unsigned char *)ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); break; case -EPERM: FTRACE_WARN_ON_ONCE(1); @@ -1775,6 +1780,24 @@ void ftrace_bug(int failed, unsigned long ip) pr_info("ftrace faulted on unknown error "); print_ip_sym(ip); } + if (rec) { + struct ftrace_ops *ops = NULL; + + pr_info("ftrace record flags: %lx\n", rec->flags); + pr_cont(" (%ld)%s", ftrace_rec_count(rec), + rec->flags & FTRACE_FL_REGS ? " R" : " "); + if (rec->flags & FTRACE_FL_TRAMP_EN) { + ops = ftrace_find_tramp_ops_any(rec); + if (ops) + pr_cont("\ttramp: %pS", + (void *)ops->trampoline); + else + pr_cont("\ttramp: ERROR!"); + + } + ip = ftrace_get_addr_curr(rec); + pr_cont(" expected tramp: %lx\n", ip); + } } static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) @@ -2097,7 +2120,7 @@ void __weak ftrace_replace_code(int enable) do_for_each_ftrace_rec(pg, rec) { failed = __ftrace_replace_code(rec, enable); if (failed) { - ftrace_bug(failed, rec->ip); + ftrace_bug(failed, rec); /* Stop processing */ return; } @@ -2179,17 +2202,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) static int ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) { - unsigned long ip; int ret; - ip = rec->ip; - if (unlikely(ftrace_disabled)) return 0; ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); if (ret) { - ftrace_bug(ret, ip); + ftrace_bug(ret, rec); return 0; } return 1; @@ -2633,7 +2653,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) if (ftrace_start_up && cnt) { int failed = __ftrace_replace_code(p, 1); if (failed) - ftrace_bug(failed, p->ip); + ftrace_bug(failed, p); } } } -- cgit v1.2.3 From 9960efeb80f73bd073483dab0855ee0ddc27085c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 18 Nov 2014 19:13:25 -0500 Subject: ftrace/x86: Add frames pointers to trampoline as necessary When CONFIG_FRAME_POINTERS are enabled, it is required that the ftrace_caller and ftrace_regs_caller trampolines set up frame pointers otherwise a stack trace from a function call wont print the functions that called the trampoline. This is due to a check in __save_stack_address(): #ifdef CONFIG_FRAME_POINTER if (!reliable) return; #endif The "reliable" variable is only set if the function address is equal to contents of the address before the address the frame pointer register points to. If the frame pointer is not set up for the ftrace caller then this will fail the reliable test. It will miss the function that called the trampoline. Worse yet, if fentry is used (gcc 4.6 and beyond), it will also miss the parent, as the fentry is called before the stack frame is set up. That means the bp frame pointer points to the stack of just before the parent function was called. Link: http://lkml.kernel.org/r/20141119034829.355440340@goodmis.org Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: stable@vger.kernel.org # 3.7+ Acked-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 42f0cdd20baf..35a793fa4bba 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -47,14 +47,51 @@ GLOBAL(\trace_label) #endif .endm +#ifdef CONFIG_FRAME_POINTER +/* + * Stack traces will stop at the ftrace trampoline if the frame pointer + * is not set up properly. If fentry is used, we need to save a frame + * pointer for the parent as well as the function traced, because the + * fentry is called before the stack frame is set up, where as mcount + * is called afterward. + */ +.macro create_frame parent rip +#ifdef CC_USING_FENTRY + pushq \parent + pushq %rbp + movq %rsp, %rbp +#endif + pushq \rip + pushq %rbp + movq %rsp, %rbp +.endm + +.macro restore_frame +#ifdef CC_USING_FENTRY + addq $16, %rsp +#endif + popq %rbp + addq $8, %rsp +.endm +#else +.macro create_frame parent rip +.endm +.macro restore_frame +.endm +#endif /* CONFIG_FRAME_POINTER */ + ENTRY(ftrace_caller) ftrace_caller_setup ftrace_caller_op_ptr /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx + create_frame %rsi, %rdi + GLOBAL(ftrace_call) call ftrace_stub + restore_frame + MCOUNT_RESTORE_FRAME /* @@ -105,9 +142,13 @@ ENTRY(ftrace_regs_caller) /* regs go into 4th parameter */ leaq (%rsp), %rcx + create_frame %rsi, %rdi + GLOBAL(ftrace_regs_call) call ftrace_stub + restore_frame + /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax movq %rax, SS(%rsp) -- cgit v1.2.3 From aec0be2d6e9f02dbef41ee54854c2e003e55c23e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 18 Nov 2014 21:14:11 -0500 Subject: ftrace/x86/extable: Add is_ftrace_trampoline() function Stack traces that happen from function tracing check if the address on the stack is a __kernel_text_address(). That is, is the address kernel code. This calls core_kernel_text() which returns true if the address is part of the builtin kernel code. It also calls is_module_text_address() which returns true if the address belongs to module code. But what is missing is ftrace dynamically allocated trampolines. These trampolines are allocated for individual ftrace_ops that call the ftrace_ops callback functions directly. But if they do a stack trace, the code checking the stack wont detect them as they are neither core kernel code nor module address space. Adding another field to ftrace_ops that also stores the size of the trampoline assigned to it we can create a new function called is_ftrace_trampoline() that returns true if the address is a dynamically allocate ftrace trampoline. Note, it ignores trampolines that are not dynamically allocated as they will return true with the core_kernel_text() function. Link: http://lkml.kernel.org/r/20141119034829.497125839@goodmis.org Cc: Ingo Molnar Cc: "H. Peter Anvin" Acked-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 9 +++++++-- include/linux/ftrace.h | 8 ++++++++ kernel/extable.c | 7 ++++++- kernel/trace/ftrace.c | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1aea94d336c7..60881d919432 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -712,7 +712,8 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -static unsigned long create_trampoline(struct ftrace_ops *ops) +static unsigned long +create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) { unsigned const char *jmp; unsigned long start_offset; @@ -749,6 +750,8 @@ static unsigned long create_trampoline(struct ftrace_ops *ops) if (!trampoline) return 0; + *tramp_size = size + MCOUNT_INSN_SIZE + sizeof(void *); + /* Copy ftrace_caller onto the trampoline memory */ ret = probe_kernel_read(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) { @@ -819,6 +822,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) unsigned char *new; unsigned long offset; unsigned long ip; + unsigned int size; int ret; if (ops->trampoline) { @@ -829,9 +833,10 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return; } else { - ops->trampoline = create_trampoline(ops); + ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; + ops->trampoline_size = size; } offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 619e37cc17fd..7b2616fa2472 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -150,6 +150,7 @@ struct ftrace_ops { struct ftrace_ops_hash *func_hash; struct ftrace_ops_hash old_hash; unsigned long trampoline; + unsigned long trampoline_size; #endif }; @@ -297,6 +298,8 @@ extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); +bool is_ftrace_trampoline(unsigned long addr); + /* * The dyn_ftrace record's flags field is split into two parts. * the first part which is '0-FTRACE_REF_MAX' is a counter of @@ -596,6 +599,11 @@ static inline ssize_t ftrace_notrace_write(struct file *file, const char __user size_t cnt, loff_t *ppos) { return -ENODEV; } static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } + +static inline bool is_ftrace_trampoline(unsigned long addr) +{ + return false; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/extable.c b/kernel/extable.c index d8a6446adbcb..c98f926277a8 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,8 @@ int __kernel_text_address(unsigned long addr) return 1; if (is_module_text_address(addr)) return 1; + if (is_ftrace_trampoline(addr)) + return 1; /* * There might be init symbols in saved stacktraces. * Give those symbols a chance to be printed in @@ -119,7 +122,9 @@ int kernel_text_address(unsigned long addr) { if (core_kernel_text(addr)) return 1; - return is_module_text_address(addr); + if (is_module_text_address(addr)) + return 1; + return is_ftrace_trampoline(addr); } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6233f9102179..fa0f36bb32e9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1117,6 +1117,43 @@ static struct ftrace_ops global_ops = { FTRACE_OPS_FL_INITIALIZED, }; +/* + * This is used by __kernel_text_address() to return true if the + * the address is on a dynamically allocated trampoline that would + * not return true for either core_kernel_text() or + * is_module_text_address(). + */ +bool is_ftrace_trampoline(unsigned long addr) +{ + struct ftrace_ops *op; + bool ret = false; + + /* + * Some of the ops may be dynamically allocated, + * they are freed after a synchronize_sched(). + */ + preempt_disable_notrace(); + + do_for_each_ftrace_op(op, ftrace_ops_list) { + /* + * This is to check for dynamically allocated trampolines. + * Trampolines that are in kernel text will have + * core_kernel_text() return true. + */ + if (op->trampoline && op->trampoline_size) + if (addr >= op->trampoline && + addr < op->trampoline + op->trampoline_size) { + ret = true; + goto out; + } + } while_for_each_ftrace_op(op); + + out: + preempt_enable_notrace(); + + return ret; +} + struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; @@ -5373,6 +5410,7 @@ static struct ftrace_ops graph_ops = { FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, + /* trampoline_size is only needed for dynamically allocated tramps */ #endif ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; -- cgit v1.2.3 From 467aa1f276a0cf7445a1f4b9600f03d949b76251 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 24 Sep 2014 18:52:39 -0400 Subject: x86/kvm/tracing: Use helper function trace_seq_buffer_ptr() To allow for the restructiong of the trace_seq code, we need users of it to use the helper functions instead of accessing the internals of the trace_seq structure itself. Link: http://lkml.kernel.org/r/20141104160221.585025609@goodmis.org Tested-by: Jiri Kosina Acked-by: Jiri Kosina Acked-by: Paolo Bonzini Acked-by: Mark Rustad Reviewed-by: Petr Mladek Cc: Jeff Kirsher Signed-off-by: Steven Rostedt --- arch/x86/kvm/mmutrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 5aaf35641768..ce463a9cc8fb 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,7 +22,7 @@ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ - const u32 saved_len = p->len; \ + const char *saved_ptr = trace_seq_buffer_ptr(p); \ static const char *access_str[] = { \ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ }; \ @@ -41,7 +41,7 @@ role.nxe ? "" : "!", \ __entry->root_count, \ __entry->unsync ? "unsync" : "sync", 0); \ - p->buffer + saved_len; \ + saved_ptr; \ }) #define kvm_mmu_trace_pferr_flags \ -- cgit v1.2.3 From a017784f1b236cbc42ce83b4345a667c21113481 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 Oct 2014 13:01:06 +0000 Subject: kprobes/ftrace: Recover original IP if pre_handler doesn't change it Recover original IP register if the pre_handler doesn't change it. Since current kprobes doesn't expect that another ftrace handler may change regs->ip, it sets kprobe.addr + MCOUNT_INSN_SIZE to regs->ip and returns to ftrace. This seems wrong behavior since kprobes can recover regs->ip and safely pass it to another handler. This adds code which recovers original regs->ip passed from ftrace right before returning to ftrace, so that another ftrace user can change regs->ip. Link: http://lkml.kernel.org/r/20141009130106.4698.26362.stgit@kbuild-f20.novalocal Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/kprobes/ftrace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 717b02a22e67..5f8f0b3cc674 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -27,7 +27,7 @@ static nokprobe_inline int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) + struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* * Emulate singlestep (and also recover regs->ip) @@ -39,6 +39,8 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, p->post_handler(p, regs, 0); } __this_cpu_write(current_kprobe, NULL); + if (orig_ip) + regs->ip = orig_ip; return 1; } @@ -46,7 +48,7 @@ int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb); + return __skip_singlestep(p, regs, kcb, 0); else return 0; } @@ -71,13 +73,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (kprobe_running()) { kprobes_inc_nmissed_count(p); } else { + unsigned long orig_ip = regs->ip; /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (!p->pre_handler || !p->pre_handler(p, regs)) - __skip_singlestep(p, regs, kcb); + __skip_singlestep(p, regs, kcb, orig_ip); /* * If pre_handler returns !0, it sets regs->ip and * resets current kprobe. -- cgit v1.2.3 From 62a207d748dd9224140a634786b274fdb6ece0b9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 14:58:17 -0500 Subject: ftrace/x86: Have static function tracing always test for function graph New updates to the ftrace generic code had ftrace_stub not always being called when ftrace is off. This causes the static tracer to always save and restore functions. But it also showed that when function tracing is running, the function graph tracer can not. We should always check to see if function graph tracing is running even if the function tracer is running too. The function tracer code is not the only one that uses the hook to function mcount. Cc: Markos Chandras Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 35a793fa4bba..6dc134b8dc70 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -194,6 +194,7 @@ ENTRY(function_hook) cmpq $ftrace_stub, ftrace_trace_function jnz trace +fgraph_trace: #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpq $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller @@ -220,7 +221,7 @@ trace: MCOUNT_RESTORE_FRAME - jmp ftrace_stub + jmp fgraph_trace END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ -- cgit v1.2.3 From 76c2f13c5515979adab5de3ebda27c309f459a7b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 14:54:27 -0500 Subject: ftrace/x86: Have static tracing also use ftrace_caller_setup Linus pointed out that there were locations that did the hard coded update of the parent and rip parameters. One of them was the static tracer which could also use the ftrace_caller_setup to do that work. In fact, because it did not use it, it is prone to bugs, and since the static tracer is hardly ever used (who wants function tracing code always being called?) it doesn't get tested very often. I only run a few "does it still work" tests on it. But I do not run stress tests on that code. Although, since it is never turned off, just having it on should be stressful enough. (especially for the performance folks) There's no reason that the static tracer can't also use ftrace_caller_setup. Have it do so. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 6dc134b8dc70..24842c701660 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,12 +21,6 @@ # define function_hook mcount #endif -#ifdef CONFIG_DYNAMIC_FTRACE - -ENTRY(function_hook) - retq -END(function_hook) - /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup trace_label skip=0 MCOUNT_SAVE_FRAME \skip @@ -47,6 +41,12 @@ GLOBAL(\trace_label) #endif .endm +#ifdef CONFIG_DYNAMIC_FTRACE + +ENTRY(function_hook) + retq +END(function_hook) + #ifdef CONFIG_FRAME_POINTER /* * Stack traces will stop at the ftrace trampoline if the frame pointer @@ -207,15 +207,7 @@ GLOBAL(ftrace_stub) retq trace: - MCOUNT_SAVE_FRAME - - movq RIP(%rsp), %rdi -#ifdef CC_USING_FENTRY - movq SS+16(%rsp), %rsi -#else - movq 8(%rbp), %rsi -#endif - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup ftrace_caller_op_ptr call *ftrace_trace_function -- cgit v1.2.3 From 4bcdf1522fb11b6c3a3dabe4432b54da2bd6bc0e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 11:30:58 -0500 Subject: ftrace/x86: Move MCOUNT_SAVE_FRAME out of header file Linus pointed out that MCOUNT_SAVE_FRAME is used in only a single file and that there's no reason that it should be in a header file. Move the macro to the code that uses it. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 33 --------------------------------- arch/x86/kernel/mcount_64.S | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index e1f7fecaa7d6..f45acad3c4b6 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -1,39 +1,6 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H -#ifdef __ASSEMBLY__ - - /* skip is set if the stack was already partially adjusted */ - .macro MCOUNT_SAVE_FRAME skip=0 - /* - * We add enough stack to save all regs. - */ - subq $(SS+8-\skip), %rsp - movq %rax, RAX(%rsp) - movq %rcx, RCX(%rsp) - movq %rdx, RDX(%rsp) - movq %rsi, RSI(%rsp) - movq %rdi, RDI(%rsp) - movq %r8, R8(%rsp) - movq %r9, R9(%rsp) - /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdx - movq %rdx, RIP(%rsp) - .endm - - .macro MCOUNT_RESTORE_FRAME skip=0 - movq R9(%rsp), %r9 - movq R8(%rsp), %r8 - movq RDI(%rsp), %rdi - movq RSI(%rsp), %rsi - movq RDX(%rsp), %rdx - movq RCX(%rsp), %rcx - movq RAX(%rsp), %rax - addq $(SS+8-\skip), %rsp - .endm - -#endif - #ifdef CONFIG_FUNCTION_TRACER #ifdef CC_USING_FENTRY # define MCOUNT_ADDR ((long)(__fentry__)) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 24842c701660..94fe46725fe0 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,6 +21,35 @@ # define function_hook mcount #endif +/* skip is set if the stack was already partially adjusted */ +.macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) + .endm + +.macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp + .endm + /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup trace_label skip=0 MCOUNT_SAVE_FRAME \skip -- cgit v1.2.3 From 05df710ec343cf351475e86bfaf89938e5114d69 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 11:43:39 -0500 Subject: ftrace/x86: Rename MCOUNT_SAVE_FRAME and add more detailed comments The name MCOUNT_SAVE_FRAME is rather confusing as it really isn't a function frame that is saved, but just the required mcount registers that are needed to be saved before C code may be called. The word "frame" confuses it as being a function frame which it is not. Rename MCOUNT_SAVE_FRAME and MCOUNT_RESTORE_FRAME to save_mcount_regs and restore_mcount_regs respectively. Noticed the lower case, which keeps it from screaming at the reviewers. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 94fe46725fe0..0a693d011980 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,8 +21,24 @@ # define function_hook mcount #endif +/* + * gcc -pg option adds a call to 'mcount' in most functions. + * When -mfentry is used, the call is to 'fentry' and not 'mcount' + * and is done before the function's stack frame is set up. + * They both require a set of regs to be saved before calling + * any C code and restored before returning back to the function. + * + * On boot up, all these calls are converted into nops. When tracing + * is enabled, the call can jump to either ftrace_caller or + * ftrace_regs_caller. Callbacks (tracing functions) that require + * ftrace_regs_caller (like kprobes) need to have pt_regs passed to + * it. For this reason, the size of the pt_regs structure will be + * allocated on the stack and the required mcount registers will + * be saved in the locations that pt_regs has them in. + */ + /* skip is set if the stack was already partially adjusted */ -.macro MCOUNT_SAVE_FRAME skip=0 +.macro save_mcount_regs skip=0 /* * We add enough stack to save all regs. */ @@ -39,7 +55,7 @@ movq %rdx, RIP(%rsp) .endm -.macro MCOUNT_RESTORE_FRAME skip=0 +.macro restore_mcount_regs skip=0 movq R9(%rsp), %r9 movq R8(%rsp), %r8 movq RDI(%rsp), %rdi @@ -52,7 +68,7 @@ /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup trace_label skip=0 - MCOUNT_SAVE_FRAME \skip + save_mcount_regs \skip /* Save this location */ GLOBAL(\trace_label) @@ -121,7 +137,7 @@ GLOBAL(ftrace_call) restore_frame - MCOUNT_RESTORE_FRAME + restore_mcount_regs /* * The copied trampoline must call ftrace_return as it @@ -196,7 +212,7 @@ GLOBAL(ftrace_regs_call) movq RBX(%rsp), %rbx /* skip=8 to skip flags saved in SS */ - MCOUNT_RESTORE_FRAME 8 + restore_mcount_regs 8 /* Restore flags */ popfq @@ -240,7 +256,7 @@ trace: call *ftrace_trace_function - MCOUNT_RESTORE_FRAME + restore_mcount_regs jmp fgraph_trace END(function_hook) @@ -249,7 +265,7 @@ END(function_hook) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - MCOUNT_SAVE_FRAME + save_mcount_regs #ifdef CC_USING_FENTRY leaq SS+16(%rsp), %rdi @@ -263,7 +279,7 @@ ENTRY(ftrace_graph_caller) call prepare_ftrace_return - MCOUNT_RESTORE_FRAME + restore_mcount_regs retq END(ftrace_graph_caller) -- cgit v1.2.3 From 094dfc545139510f251b9595850aa63fe2a8c131 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 13:21:09 -0500 Subject: ftrace/x86: Have save_mcount_regs store RIP in %rdi for first parameter Instead of having save_mcount_regs store the RIP in %rdx as a temp register to place it in the proper location of the pt_regs on the stack. Use the %rdi register as the temp register. This lets us remove the extra store in the ftrace_caller_setup macro. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 0a693d011980..4f1b27642495 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -51,8 +51,8 @@ movq %r8, R8(%rsp) movq %r9, R9(%rsp) /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdx - movq %rdx, RIP(%rsp) + movq SS+8(%rsp), %rdi + movq %rdi, RIP(%rsp) .endm .macro restore_mcount_regs skip=0 @@ -75,8 +75,7 @@ GLOBAL(\trace_label) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx - /* Load ip into the first parameter */ - movq RIP(%rsp), %rdi + /* %rdi already has %rip from the save_mcount_regs macro */ subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ #ifdef CC_USING_FENTRY -- cgit v1.2.3 From 527aa75b333f90f4f90ac1730762156680a42fe8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 13:06:05 -0500 Subject: ftrace/x86: Simplify save_mcount_regs on getting RIP Currently save_mcount_regs is passed a "skip" parameter to know how much stack updated the pt_regs, as it tries to keep the saved pt_regs in the same location for all users. This is rather stupid, especially since the part stored on the pt_regs has nothing to do with what is suppose to be in that location. Instead of doing that, just pass in an "added" parameter that lets that macro know how much stack was added before it was called so that it can get to the RIP. But the difference is that it will now offset the pt_regs by that "added" count. The caller now needs to take care of the offset of the pt_regs. This will make it easier to simplify the code later. Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 4f1b27642495..596ac330c1db 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -37,12 +37,12 @@ * be saved in the locations that pt_regs has them in. */ -/* skip is set if the stack was already partially adjusted */ -.macro save_mcount_regs skip=0 +/* @added: the amount of stack added before calling this */ +.macro save_mcount_regs added=0 /* * We add enough stack to save all regs. */ - subq $(SS+8-\skip), %rsp + subq $(SS+8), %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -51,11 +51,11 @@ movq %r8, R8(%rsp) movq %r9, R9(%rsp) /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdi + movq SS+8+\added(%rsp), %rdi movq %rdi, RIP(%rsp) .endm -.macro restore_mcount_regs skip=0 +.macro restore_mcount_regs movq R9(%rsp), %r9 movq R8(%rsp), %r8 movq RDI(%rsp), %rdi @@ -63,12 +63,12 @@ movq RDX(%rsp), %rdx movq RCX(%rsp), %rcx movq RAX(%rsp), %rax - addq $(SS+8-\skip), %rsp + addq $(SS+8), %rsp .endm /* skip is set if stack has been adjusted */ -.macro ftrace_caller_setup trace_label skip=0 - save_mcount_regs \skip +.macro ftrace_caller_setup trace_label added=0 + save_mcount_regs \added /* Save this location */ GLOBAL(\trace_label) @@ -79,9 +79,9 @@ GLOBAL(\trace_label) subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ #ifdef CC_USING_FENTRY - movq SS+16(%rsp), %rsi + movq SS+16+\added(%rsp), %rsi #else - movq 8(%rbp), %rsi + movq 8+\added(%rbp), %rsi #endif .endm @@ -156,10 +156,10 @@ GLOBAL(ftrace_stub) END(ftrace_caller) ENTRY(ftrace_regs_caller) - /* Save the current flags before compare (in SS location)*/ + /* Save the current flags before any operations that can change them */ pushfq - /* skip=8 to skip flags saved in SS */ + /* added 8 bytes to save flags */ ftrace_caller_setup ftrace_regs_caller_op_ptr 8 /* Save the rest of pt_regs */ @@ -172,15 +172,15 @@ ENTRY(ftrace_regs_caller) movq %rbp, RBP(%rsp) movq %rbx, RBX(%rsp) /* Copy saved flags */ - movq SS(%rsp), %rcx + movq SS+8(%rsp), %rcx movq %rcx, EFLAGS(%rsp) /* Kernel segments */ movq $__KERNEL_DS, %rcx movq %rcx, SS(%rsp) movq $__KERNEL_CS, %rcx movq %rcx, CS(%rsp) - /* Stack - skipping return address */ - leaq SS+16(%rsp), %rcx + /* Stack - skipping return address and flags */ + leaq SS+8*3(%rsp), %rcx movq %rcx, RSP(%rsp) /* regs go into 4th parameter */ @@ -195,11 +195,11 @@ GLOBAL(ftrace_regs_call) /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax - movq %rax, SS(%rsp) + movq %rax, SS+8(%rsp) /* Handlers can change the RIP */ movq RIP(%rsp), %rax - movq %rax, SS+8(%rsp) + movq %rax, SS+8*2(%rsp) /* restore the rest of pt_regs */ movq R15(%rsp), %r15 @@ -210,8 +210,7 @@ GLOBAL(ftrace_regs_call) movq RBP(%rsp), %rbp movq RBX(%rsp), %rbx - /* skip=8 to skip flags saved in SS */ - restore_mcount_regs 8 + restore_mcount_regs /* Restore flags */ popfq -- cgit v1.2.3 From 85f6f0290c4d4667a5afb06e66815bcf5ce2c4f7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 14:26:38 -0500 Subject: ftrace/x86: Add macro MCOUNT_REG_SIZE for amount of stack used to save mcount regs The macro save_mcount_regs saves regs onto the stack. But to uncouple the amount of stack used in that macro from the users of the macro, we need to have a define that tells all the users how much stack is used by that macro. This way we can change the amount of stack the macro uses without breaking its users. Also remove some dead code that was left over from commit fdc841b58cf5 "ftrace: x86: Remove check of obsolete variable function_trace_stop". Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 596ac330c1db..a0f6f942183a 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,6 +21,9 @@ # define function_hook mcount #endif +/* Size of stack used to save mcount regs in save_mcount_regs */ +#define MCOUNT_REG_SIZE (SS+8) + /* * gcc -pg option adds a call to 'mcount' in most functions. * When -mfentry is used, the call is to 'fentry' and not 'mcount' @@ -42,7 +45,7 @@ /* * We add enough stack to save all regs. */ - subq $(SS+8), %rsp + subq $MCOUNT_REG_SIZE, %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -51,7 +54,7 @@ movq %r8, R8(%rsp) movq %r9, R9(%rsp) /* Move RIP to its proper location */ - movq SS+8+\added(%rsp), %rdi + movq MCOUNT_REG_SIZE+\added(%rsp), %rdi movq %rdi, RIP(%rsp) .endm @@ -63,7 +66,7 @@ movq RDX(%rsp), %rdx movq RCX(%rsp), %rcx movq RAX(%rsp), %rax - addq $(SS+8), %rsp + addq $MCOUNT_REG_SIZE, %rsp .endm /* skip is set if stack has been adjusted */ @@ -79,7 +82,7 @@ GLOBAL(\trace_label) subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ #ifdef CC_USING_FENTRY - movq SS+16+\added(%rsp), %rsi + movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi #else movq 8+\added(%rbp), %rsi #endif @@ -172,7 +175,7 @@ ENTRY(ftrace_regs_caller) movq %rbp, RBP(%rsp) movq %rbx, RBX(%rsp) /* Copy saved flags */ - movq SS+8(%rsp), %rcx + movq MCOUNT_REG_SIZE(%rsp), %rcx movq %rcx, EFLAGS(%rsp) /* Kernel segments */ movq $__KERNEL_DS, %rcx @@ -180,7 +183,7 @@ ENTRY(ftrace_regs_caller) movq $__KERNEL_CS, %rcx movq %rcx, CS(%rsp) /* Stack - skipping return address and flags */ - leaq SS+8*3(%rsp), %rcx + leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx movq %rcx, RSP(%rsp) /* regs go into 4th parameter */ @@ -195,11 +198,11 @@ GLOBAL(ftrace_regs_call) /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax - movq %rax, SS+8(%rsp) + movq %rax, MCOUNT_REG_SIZE(%rsp) /* Handlers can change the RIP */ movq RIP(%rsp), %rax - movq %rax, SS+8*2(%rsp) + movq %rax, MCOUNT_REG_SIZE+8(%rsp) /* restore the rest of pt_regs */ movq R15(%rsp), %r15 @@ -225,9 +228,6 @@ GLOBAL(ftrace_regs_caller_end) jmp ftrace_return - popfq - jmp ftrace_stub - END(ftrace_regs_caller) @@ -266,7 +266,7 @@ ENTRY(ftrace_graph_caller) save_mcount_regs #ifdef CC_USING_FENTRY - leaq SS+16(%rsp), %rdi + leaq MCOUNT_REG_SIZE+8(%rsp), %rdi movq $0, %rdx /* No framepointers needed */ #else leaq 8(%rbp), %rdi -- cgit v1.2.3 From 0687c36e456ca81feff5f3415e53c1cb8d8efd56 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 18:08:48 -0500 Subject: ftrace/x86: Have save_mcount_regs macro also save stack frames if needed The save_mcount_regs macro saves and restores the required mcount regs that need to be saved before calling C code. It is done for all the function hook utilities (static tracing, dynamic tracing, regs, function graph). When frame pointers are enabled, the ftrace trampolines need to set up frames and pointers such that a back trace (dump stack) can continue passed them. Currently, a separate macro is used (create_frame) to do this, but it's only done for the ftrace_caller and ftrace_reg_caller functions. It is not done for the static tracer or function graph tracing. Instead of having a separate macro doing the recording of the frames, have the save_mcount_regs perform this task. This also has all tracers saving the frame pointers when needed. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 118 +++++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index a0f6f942183a..003b22df1d87 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,8 +21,22 @@ # define function_hook mcount #endif +/* All cases save the original rbp (8 bytes) */ +#ifdef CONFIG_FRAME_POINTER +# ifdef CC_USING_FENTRY +/* Save parent and function stack frames (rip and rbp) */ +# define MCOUNT_FRAME_SIZE (8+16*2) +# else +/* Save just function stack frame (rip and rbp) */ +# define MCOUNT_FRAME_SIZE (8+16) +# endif +#else +/* No need to save a stack frame */ +# define MCOUNT_FRAME_SIZE 8 +#endif /* CONFIG_FRAME_POINTER */ + /* Size of stack used to save mcount regs in save_mcount_regs */ -#define MCOUNT_REG_SIZE (SS+8) +#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE) /* * gcc -pg option adds a call to 'mcount' in most functions. @@ -42,10 +56,37 @@ /* @added: the amount of stack added before calling this */ .macro save_mcount_regs added=0 - /* - * We add enough stack to save all regs. - */ - subq $MCOUNT_REG_SIZE, %rsp + + /* Always save the original rbp */ + pushq %rbp + +#ifdef CONFIG_FRAME_POINTER + /* + * Stack traces will stop at the ftrace trampoline if the frame pointer + * is not set up properly. If fentry is used, we need to save a frame + * pointer for the parent as well as the function traced, because the + * fentry is called before the stack frame is set up, where as mcount + * is called afterward. + */ +#ifdef CC_USING_FENTRY + /* Save the parent pointer (skip orig rbp and our return address) */ + pushq \added+8*2(%rsp) + pushq %rbp + movq %rsp, %rbp + /* Save the return address (now skip orig rbp, rbp and parent) */ + pushq \added+8*3(%rsp) +#else + /* Can't assume that rip is before this (unless added was zero) */ + pushq \added+8(%rsp) +#endif + pushq %rbp + movq %rsp, %rbp +#endif /* CONFIG_FRAME_POINTER */ + + /* + * We add enough stack to save all regs. + */ + subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -53,6 +94,13 @@ movq %rdi, RDI(%rsp) movq %r8, R8(%rsp) movq %r9, R9(%rsp) + /* + * Save the original RBP. Even though the mcount ABI does not + * require this, it helps out callers. + */ + movq MCOUNT_REG_SIZE-8(%rsp), %rdx + movq %rdx, RBP(%rsp) + /* Move RIP to its proper location */ movq MCOUNT_REG_SIZE+\added(%rsp), %rdi movq %rdi, RIP(%rsp) @@ -66,7 +114,12 @@ movq RDX(%rsp), %rdx movq RCX(%rsp), %rcx movq RAX(%rsp), %rax + + /* ftrace_regs_caller can modify %rbp */ + movq RBP(%rsp), %rbp + addq $MCOUNT_REG_SIZE, %rsp + .endm /* skip is set if stack has been adjusted */ @@ -84,7 +137,10 @@ GLOBAL(\trace_label) #ifdef CC_USING_FENTRY movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi #else - movq 8+\added(%rbp), %rsi + /* Need to grab the original %rbp */ + movq RBP(%rsp), %rsi + /* Now parent address is 8 above original %rbp */ + movq 8(%rsi), %rsi #endif .endm @@ -94,51 +150,14 @@ ENTRY(function_hook) retq END(function_hook) -#ifdef CONFIG_FRAME_POINTER -/* - * Stack traces will stop at the ftrace trampoline if the frame pointer - * is not set up properly. If fentry is used, we need to save a frame - * pointer for the parent as well as the function traced, because the - * fentry is called before the stack frame is set up, where as mcount - * is called afterward. - */ -.macro create_frame parent rip -#ifdef CC_USING_FENTRY - pushq \parent - pushq %rbp - movq %rsp, %rbp -#endif - pushq \rip - pushq %rbp - movq %rsp, %rbp -.endm - -.macro restore_frame -#ifdef CC_USING_FENTRY - addq $16, %rsp -#endif - popq %rbp - addq $8, %rsp -.endm -#else -.macro create_frame parent rip -.endm -.macro restore_frame -.endm -#endif /* CONFIG_FRAME_POINTER */ - ENTRY(ftrace_caller) ftrace_caller_setup ftrace_caller_op_ptr /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx - create_frame %rsi, %rdi - GLOBAL(ftrace_call) call ftrace_stub - restore_frame - restore_mcount_regs /* @@ -172,7 +191,6 @@ ENTRY(ftrace_regs_caller) movq %r12, R12(%rsp) movq %r11, R11(%rsp) movq %r10, R10(%rsp) - movq %rbp, RBP(%rsp) movq %rbx, RBX(%rsp) /* Copy saved flags */ movq MCOUNT_REG_SIZE(%rsp), %rcx @@ -189,13 +207,9 @@ ENTRY(ftrace_regs_caller) /* regs go into 4th parameter */ leaq (%rsp), %rcx - create_frame %rsi, %rdi - GLOBAL(ftrace_regs_call) call ftrace_stub - restore_frame - /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax movq %rax, MCOUNT_REG_SIZE(%rsp) @@ -210,7 +224,6 @@ GLOBAL(ftrace_regs_call) movq R13(%rsp), %r13 movq R12(%rsp), %r12 movq R10(%rsp), %r10 - movq RBP(%rsp), %rbp movq RBX(%rsp), %rbx restore_mcount_regs @@ -269,8 +282,11 @@ ENTRY(ftrace_graph_caller) leaq MCOUNT_REG_SIZE+8(%rsp), %rdi movq $0, %rdx /* No framepointers needed */ #else - leaq 8(%rbp), %rdi - movq (%rbp), %rdx + /* Need to grab the original %rbp */ + movq RBP(%rsp), %rdx + /* Now parent address is 8 above original %rbp */ + leaq 8(%rdx), %rdi + movq (%rdx), %rdx #endif movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi -- cgit v1.2.3 From f1ab00af816ec8e1ad53229963c863fdd6bcf222 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 21:38:40 -0500 Subject: ftrace/x86: Get rid of ftrace_caller_setup Move all the work from ftrace_caller_setup into save_mcount_regs. This simplifies the code and makes it easier to understand. Link: http://lkml.kernel.org/r/CA+55aFxUTUbdxpjVMW8X9c=o8sui7OB_MYPfcbJuDyfUWtNrNg@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 71 +++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 003b22df1d87..ddc766efa1f1 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -54,7 +54,15 @@ * be saved in the locations that pt_regs has them in. */ -/* @added: the amount of stack added before calling this */ +/* + * @added: the amount of stack added before calling this + * + * After this is called, the following registers contain: + * + * %rdi - holds the address that called the trampoline + * %rsi - holds the parent function (traced function's return address) + * %rdx - holds the original %rbp + */ .macro save_mcount_regs added=0 /* Always save the original rbp */ @@ -101,9 +109,24 @@ movq MCOUNT_REG_SIZE-8(%rsp), %rdx movq %rdx, RBP(%rsp) + /* Copy the parent address into %rsi (second parameter) */ +#ifdef CC_USING_FENTRY + movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi +#else + /* %rdx contains original %rbp */ + movq 8(%rdx), %rsi +#endif + /* Move RIP to its proper location */ movq MCOUNT_REG_SIZE+\added(%rsp), %rdi movq %rdi, RIP(%rsp) + + /* + * Now %rdi (the first parameter) has the return address of + * where ftrace_call returns. But the callbacks expect the + * the address of the call itself. + */ + subq $MCOUNT_INSN_SIZE, %rdi .endm .macro restore_mcount_regs @@ -122,28 +145,6 @@ .endm -/* skip is set if stack has been adjusted */ -.macro ftrace_caller_setup trace_label added=0 - save_mcount_regs \added - - /* Save this location */ -GLOBAL(\trace_label) - /* Load the ftrace_ops into the 3rd parameter */ - movq function_trace_op(%rip), %rdx - - /* %rdi already has %rip from the save_mcount_regs macro */ - subq $MCOUNT_INSN_SIZE, %rdi - /* Load the parent_ip into the second parameter */ -#ifdef CC_USING_FENTRY - movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi -#else - /* Need to grab the original %rbp */ - movq RBP(%rsp), %rsi - /* Now parent address is 8 above original %rbp */ - movq 8(%rsi), %rsi -#endif -.endm - #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(function_hook) @@ -151,7 +152,13 @@ ENTRY(function_hook) END(function_hook) ENTRY(ftrace_caller) - ftrace_caller_setup ftrace_caller_op_ptr + /* save_mcount_regs fills in first two parameters */ + save_mcount_regs + +GLOBAL(ftrace_caller_op_ptr) + /* Load the ftrace_ops into the 3rd parameter */ + movq function_trace_op(%rip), %rdx + /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx @@ -182,7 +189,12 @@ ENTRY(ftrace_regs_caller) pushfq /* added 8 bytes to save flags */ - ftrace_caller_setup ftrace_regs_caller_op_ptr 8 + save_mcount_regs 8 + /* save_mcount_regs fills in first two parameters */ + +GLOBAL(ftrace_regs_caller_op_ptr) + /* Load the ftrace_ops into the 3rd parameter */ + movq function_trace_op(%rip), %rdx /* Save the rest of pt_regs */ movq %r15, R15(%rsp) @@ -263,7 +275,8 @@ GLOBAL(ftrace_stub) retq trace: - ftrace_caller_setup ftrace_caller_op_ptr + /* save_mcount_regs fills in first two parameters */ + save_mcount_regs call *ftrace_trace_function @@ -276,16 +289,16 @@ END(function_hook) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) + /* Saves rbp into %rdx */ save_mcount_regs #ifdef CC_USING_FENTRY leaq MCOUNT_REG_SIZE+8(%rsp), %rdi movq $0, %rdx /* No framepointers needed */ #else - /* Need to grab the original %rbp */ - movq RBP(%rsp), %rdx - /* Now parent address is 8 above original %rbp */ + /* Save address of the return address of traced function */ leaq 8(%rdx), %rdi + /* ftrace does sanity checks against frame pointers */ movq (%rdx), %rdx #endif movq RIP(%rsp), %rsi -- cgit v1.2.3 From 6a06bdbf7f9c669743f58084991ba280f2925586 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 21:00:34 -0500 Subject: ftrace/fgraph/x86: Have prepare_ftrace_return() take ip as first parameter The function graph helper function prepare_ftrace_return() which does the work to hijack the parent pointer has that parent pointer as its first parameter. Instead, if we make it the second parameter and have ip as the first parameter (self_addr), then it can use the %rdi from save_mcount_regs that loads it already. Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 4 ++-- arch/x86/kernel/mcount_64.S | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 60881d919432..2142376dc8c6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -871,7 +871,7 @@ static void *addr_from_call(void *ptr) return ptr + MCOUNT_INSN_SIZE + calc.offset; } -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer); /* @@ -964,7 +964,7 @@ int ftrace_disable_ftrace_graph_caller(void) * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) { unsigned long old; diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index ddc766efa1f1..94ea120fa21f 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -124,7 +124,7 @@ /* * Now %rdi (the first parameter) has the return address of * where ftrace_call returns. But the callbacks expect the - * the address of the call itself. + * address of the call itself. */ subq $MCOUNT_INSN_SIZE, %rdi .endm @@ -289,21 +289,18 @@ END(function_hook) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - /* Saves rbp into %rdx */ + /* Saves rbp into %rdx and fills first parameter */ save_mcount_regs #ifdef CC_USING_FENTRY - leaq MCOUNT_REG_SIZE+8(%rsp), %rdi + leaq MCOUNT_REG_SIZE+8(%rsp), %rsi movq $0, %rdx /* No framepointers needed */ #else /* Save address of the return address of traced function */ - leaq 8(%rdx), %rdi + leaq 8(%rdx), %rsi /* ftrace does sanity checks against frame pointers */ movq (%rdx), %rdx #endif - movq RIP(%rsp), %rsi - subq $MCOUNT_INSN_SIZE, %rsi - call prepare_ftrace_return restore_mcount_regs -- cgit v1.2.3