From 6743ad432ec92e680cd0d9db86cb17b949cf5a43 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:33 +0900 Subject: kprobes: Suppress the suspicious RCU warning on kprobes Anders reported that the lockdep warns that suspicious RCU list usage in register_kprobe() (detected by CONFIG_PROVE_RCU_LIST.) This is because get_kprobe() access kprobe_table[] by hlist_for_each_entry_rcu() without rcu_read_lock. If we call get_kprobe() from the breakpoint handler context, it is run with preempt disabled, so this is not a problem. But in other cases, instead of rcu_read_lock(), we locks kprobe_mutex so that the kprobe_table[] is not updated. So, current code is safe, but still not good from the view point of RCU. Joel suggested that we can silent that warning by passing lockdep_is_held() to the last argument of hlist_for_each_entry_rcu(). Add lockdep_is_held(&kprobe_mutex) at the end of the hlist_for_each_entry_rcu() to suppress the warning. Link: http://lkml.kernel.org/r/158927055350.27680.10261450713467997503.stgit@devnote2 Reported-by: Anders Roxell Suggested-by: Joel Fernandes Reviewed-by: Joel Fernandes (Google) Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/kprobes.c') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 50cd84f53df0..8b2fd4145ab3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -326,7 +326,8 @@ struct kprobe *get_kprobe(void *addr) struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist, + lockdep_is_held(&kprobe_mutex)) { if (p->addr == addr) return p; } -- cgit v1.2.3 From 7e6a71d8e60187726e29b13d9e9b23b77026c17a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:44 +0900 Subject: kprobes: Use non RCU traversal APIs on kprobe_tables if possible Current kprobes uses RCU traversal APIs on kprobe_tables even if it is safe because kprobe_mutex is locked. Make those traversals to non-RCU APIs where the kprobe_mutex is locked. Link: http://lkml.kernel.org/r/158927056452.27680.9710575332163005121.stgit@devnote2 Reviewed-by: Joel Fernandes (Google) Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'kernel/kprobes.c') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 8b2fd4145ab3..ceb0e273bd69 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -46,6 +46,11 @@ static int kprobes_initialized; +/* kprobe_table can be accessed by + * - Normal hlist traversal and RCU add/del under kprobe_mutex is held. + * Or + * - RCU hlist traversal under disabling preempt (breakpoint handlers) + */ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; @@ -850,7 +855,7 @@ static void optimize_all_kprobes(void) kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) + hlist_for_each_entry(p, head, hlist) if (!kprobe_disabled(p)) optimize_kprobe(p); } @@ -877,7 +882,7 @@ static void unoptimize_all_kprobes(void) kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry(p, head, hlist) { if (!kprobe_disabled(p)) unoptimize_kprobe(p, false); } @@ -1500,12 +1505,14 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { struct kprobe *ap, *list_p; + lockdep_assert_held(&kprobe_mutex); + ap = get_kprobe(p->addr); if (unlikely(!ap)) return NULL; if (p != ap) { - list_for_each_entry_rcu(list_p, &ap->list, list) + list_for_each_entry(list_p, &ap->list, list) if (list_p == p) /* kprobe p is a valid probe */ goto valid; @@ -1670,7 +1677,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap) { struct kprobe *kp; - list_for_each_entry_rcu(kp, &ap->list, list) + lockdep_assert_held(&kprobe_mutex); + + list_for_each_entry(kp, &ap->list, list) if (!kprobe_disabled(kp)) /* * There is an active probe on the list. @@ -1749,7 +1758,7 @@ static int __unregister_kprobe_top(struct kprobe *p) else { /* If disabling probe has special handlers, update aggrprobe */ if (p->post_handler && !kprobe_gone(p)) { - list_for_each_entry_rcu(list_p, &ap->list, list) { + list_for_each_entry(list_p, &ap->list, list) { if ((list_p != p) && (list_p->post_handler)) goto noclean; } @@ -2063,13 +2072,15 @@ static void kill_kprobe(struct kprobe *p) { struct kprobe *kp; + lockdep_assert_held(&kprobe_mutex); + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* * If this is an aggr_kprobe, we have to list all the * chained probes and mark them GONE. */ - list_for_each_entry_rcu(kp, &p->list, list) + list_for_each_entry(kp, &p->list, list) kp->flags |= KPROBE_FLAG_GONE; p->post_handler = NULL; kill_optimized_kprobe(p); @@ -2313,7 +2324,7 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) + hlist_for_each_entry(p, head, hlist) if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2551,7 +2562,7 @@ static int arm_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; /* Arm all kprobes on a best-effort basis */ - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry(p, head, hlist) { if (!kprobe_disabled(p)) { err = arm_kprobe(p); if (err) { @@ -2594,7 +2605,7 @@ static int disarm_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; /* Disarm all kprobes on a best-effort basis */ - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry(p, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) { err = disarm_kprobe(p, false); if (err) { -- cgit v1.2.3 From 1a0aa991a6274161c95a844c58cfb801d681eb59 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:56 +0900 Subject: kprobes: Fix to protect kick_kprobe_optimizer() by kprobe_mutex In kprobe_optimizer() kick_kprobe_optimizer() is called without kprobe_mutex, but this can race with other caller which is protected by kprobe_mutex. To fix that, expand kprobe_mutex protected area to protect kick_kprobe_optimizer() call. Link: http://lkml.kernel.org/r/158927057586.27680.5036330063955940456.stgit@devnote2 Fixes: cd7ebe2298ff ("kprobes: Use text_poke_smp_batch for optimizing") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Ziqian SUN Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/kprobes.c') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ceb0e273bd69..0e185763578b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -592,11 +592,12 @@ static void kprobe_optimizer(struct work_struct *work) mutex_unlock(&module_mutex); mutex_unlock(&text_mutex); cpus_read_unlock(); - mutex_unlock(&kprobe_mutex); /* Step 5: Kick optimizer again if needed */ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) kick_kprobe_optimizer(); + + mutex_unlock(&kprobe_mutex); } /* Wait for completing optimization and unoptimization */ -- cgit v1.2.3 From 75ddf64dd276e3fc8906f27549afa229798ad916 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:03:07 +0900 Subject: kprobes: Remove redundant arch_disarm_kprobe() call Fix to remove redundant arch_disarm_kprobe() call in force_unoptimize_kprobe(). This arch_disarm_kprobe() will be invoked if the kprobe is optimized but disabled, but that means the kprobe (optprobe) is unused (and unoptimized) state. In that case, unoptimize_kprobe() puts it in freeing_list and kprobe_optimizer (do_unoptimize_kprobes()) automatically disarm it. Thus this arch_disarm_kprobe() is redundant. Link: http://lkml.kernel.org/r/158927058719.27680.17183632908465341189.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/kprobes.c') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 0e185763578b..5cb7791c16b3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -675,8 +675,6 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; - if (kprobe_disabled(&op->kp)) - arch_disarm_kprobe(&op->kp); } /* Unoptimize a kprobe if p is optimized */ -- cgit v1.2.3 From 9b38cc704e844e41d9cf74e647bff1d249512cb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 12 May 2020 17:03:18 +0900 Subject: kretprobe: Prevent triggering kretprobe from within kprobe_flush_task Ziqian reported lockup when adding retprobe on _raw_spin_lock_irqsave. My test was also able to trigger lockdep output: ============================================ WARNING: possible recursive locking detected 5.6.0-rc6+ #6 Not tainted -------------------------------------------- sched-messaging/2767 is trying to acquire lock: ffffffff9a492798 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_hash_lock+0x52/0xa0 but task is already holding lock: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(kretprobe_table_locks[i].lock)); lock(&(kretprobe_table_locks[i].lock)); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by sched-messaging/2767: #0: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 stack backtrace: CPU: 3 PID: 2767 Comm: sched-messaging Not tainted 5.6.0-rc6+ #6 Call Trace: dump_stack+0x96/0xe0 __lock_acquire.cold.57+0x173/0x2b7 ? native_queued_spin_lock_slowpath+0x42b/0x9e0 ? lockdep_hardirqs_on+0x590/0x590 ? __lock_acquire+0xf63/0x4030 lock_acquire+0x15a/0x3d0 ? kretprobe_hash_lock+0x52/0xa0 _raw_spin_lock_irqsave+0x36/0x70 ? kretprobe_hash_lock+0x52/0xa0 kretprobe_hash_lock+0x52/0xa0 trampoline_handler+0xf8/0x940 ? kprobe_fault_handler+0x380/0x380 ? find_held_lock+0x3a/0x1c0 kretprobe_trampoline+0x25/0x50 ? lock_acquired+0x392/0xbc0 ? _raw_spin_lock_irqsave+0x50/0x70 ? __get_valid_kprobe+0x1f0/0x1f0 ? _raw_spin_unlock_irqrestore+0x3b/0x40 ? finish_task_switch+0x4b9/0x6d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 The code within the kretprobe handler checks for probe reentrancy, so we won't trigger any _raw_spin_lock_irqsave probe in there. The problem is in outside kprobe_flush_task, where we call: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave where _raw_spin_lock_irqsave triggers the kretprobe and installs kretprobe_trampoline handler on _raw_spin_lock_irqsave return. The kretprobe_trampoline handler is then executed with already locked kretprobe_table_locks, and first thing it does is to lock kretprobe_table_locks ;-) the whole lockup path like: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave ---> probe triggered, kretprobe_trampoline installed ---> kretprobe_table_locks locked kretprobe_trampoline trampoline_handler kretprobe_hash_lock(current, &head, &flags); <--- deadlock Adding kprobe_busy_begin/end helpers that mark code with fake probe installed to prevent triggering of another kprobe within this code. Using these helpers in kprobe_flush_task, so the probe recursion protection check is hit and the probe is never set to prevent above lockup. Link: http://lkml.kernel.org/r/158927059835.27680.7011202830041561604.stgit@devnote2 Fixes: ef53d9c5e4da ("kprobes: improve kretprobe scalability with hashed locking") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: stable@vger.kernel.org Reported-by: "Ziqian SUN (Zamir)" Acked-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/kprobes/core.c | 16 +++------------- include/linux/kprobes.h | 4 ++++ kernel/kprobes.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 13 deletions(-) (limited to 'kernel/kprobes.c') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..8a5ec10e95dc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -753,16 +753,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -772,16 +767,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -857,7 +848,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -873,8 +864,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..05ed663e6c7b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -350,6 +350,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +extern struct kprobe kprobe_busy; +void kprobe_busy_begin(void); +void kprobe_busy_end(void); + kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5cb7791c16b3..4a904cc56d68 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1241,6 +1241,26 @@ __releases(hlist_lock) } NOKPROBE_SYMBOL(kretprobe_table_unlock); +struct kprobe kprobe_busy = { + .addr = (void *) get_kprobe, +}; + +void kprobe_busy_begin(void) +{ + struct kprobe_ctlblk *kcb; + + preempt_disable(); + __this_cpu_write(current_kprobe, &kprobe_busy); + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; +} + +void kprobe_busy_end(void) +{ + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); +} + /* * This function is called from finish_task_switch when task tk becomes dead, * so that we can recycle any function-return probe instances associated @@ -1258,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk) /* Early boot. kretprobe_table_locks not yet initialized. */ return; + kprobe_busy_begin(); + INIT_HLIST_HEAD(&empty_rp); hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; @@ -1271,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk) hlist_del(&ri->hlist); kfree(ri); } + + kprobe_busy_end(); } NOKPROBE_SYMBOL(kprobe_flush_task); -- cgit v1.2.3