From 71a076f4a61a6c779794ad286f356b39725edc3b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 24 Nov 2020 12:02:09 +0100 Subject: kcsan: Rewrite kcsan_prandom_u32_max() without prandom_u32_state() Rewrite kcsan_prandom_u32_max() to not depend on code that might be instrumented, removing any dependency on lib/random32.c. The rewrite implements a simple linear congruential generator, that is sufficient for our purposes (for udelay() and skip_watch counter randomness). The initial motivation for this was to allow enabling KCSAN for kernel/sched (remove KCSAN_SANITIZE := n from kernel/sched/Makefile), with CONFIG_DEBUG_PREEMPT=y. Without this change, we could observe recursion: check_access() [via instrumentation] kcsan_setup_watchpoint() reset_kcsan_skip() kcsan_prandom_u32_max() get_cpu_var() preempt_disable() preempt_count_add() [in kernel/sched/core.c] check_access() [via instrumentation] Note, while this currently does not affect an unmodified kernel, it'd be good to keep a KCSAN kernel working when KCSAN_SANITIZE := n is removed from kernel/sched/Makefile to permit testing scheduler code with KCSAN if desired. Fixes: cd290ec24633 ("kcsan: Use tracing-safe version of prandom") Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 3994a217bde7..3bf98db9c702 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -101,7 +100,7 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; static DEFINE_PER_CPU(long, kcsan_skip); /* For kcsan_prandom_u32_max(). */ -static DEFINE_PER_CPU(struct rnd_state, kcsan_rand_state); +static DEFINE_PER_CPU(u32, kcsan_rand_state); static __always_inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, @@ -275,20 +274,17 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx * } /* - * Returns a pseudo-random number in interval [0, ep_ro). See prandom_u32_max() - * for more details. - * - * The open-coded version here is using only safe primitives for all contexts - * where we can have KCSAN instrumentation. In particular, we cannot use - * prandom_u32() directly, as its tracepoint could cause recursion. + * Returns a pseudo-random number in interval [0, ep_ro). Simple linear + * congruential generator, using constants from "Numerical Recipes". */ static u32 kcsan_prandom_u32_max(u32 ep_ro) { - struct rnd_state *state = &get_cpu_var(kcsan_rand_state); - const u32 res = prandom_u32_state(state); + u32 state = this_cpu_read(kcsan_rand_state); + + state = 1664525 * state + 1013904223; + this_cpu_write(kcsan_rand_state, state); - put_cpu_var(kcsan_rand_state); - return (u32)(((u64) res * ep_ro) >> 32); + return state % ep_ro; } static inline void reset_kcsan_skip(void) @@ -639,10 +635,14 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, void __init kcsan_init(void) { + int cpu; + BUG_ON(!in_task()); kcsan_debugfs_init(); - prandom_seed_full_state(&kcsan_rand_state); + + for_each_possible_cpu(cpu) + per_cpu(kcsan_rand_state, cpu) = (u32)get_cycles(); /* * We are in the init task, and no other tasks should be running; -- cgit v1.2.3 From dfd5e3f5fe27bda91d5cc028c86ffbb7a0614489 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Dec 2020 16:06:21 +0100 Subject: locking/lockdep: Mark local_lock_t The local_lock_t's are special, because they cannot form IRQ inversions, make sure we can tell them apart from the rest of the locks. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/local_lock_internal.h | 5 ++++- include/linux/lockdep.h | 15 ++++++++++++--- include/linux/lockdep_types.h | 18 ++++++++++++++---- kernel/locking/lockdep.c | 16 +++++++++------- 4 files changed, 39 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 4a8795b21d77..ded90b097e6e 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -18,6 +18,7 @@ typedef struct { .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_CONFIG, \ + .lock_type = LD_LOCK_PERCPU, \ } #else # define LL_DEP_MAP_INIT(lockname) @@ -30,7 +31,9 @@ do { \ static struct lock_class_key __key; \ \ debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ - lockdep_init_map_wait(&(lock)->dep_map, #lock, &__key, 0, LD_WAIT_CONFIG);\ + lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, 0, \ + LD_WAIT_CONFIG, LD_WAIT_INV, \ + LD_LOCK_PERCPU); \ } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b9e9adec73e8..7b7ebf2e28ec 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -185,12 +185,19 @@ extern void lockdep_unregister_key(struct lock_class_key *key); * to lockdep: */ -extern void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass, short inner, short outer); +extern void lockdep_init_map_type(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass, u8 inner, u8 outer, u8 lock_type); + +static inline void +lockdep_init_map_waits(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass, u8 inner, u8 outer) +{ + lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL); +} static inline void lockdep_init_map_wait(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass, short inner) + struct lock_class_key *key, int subclass, u8 inner) { lockdep_init_map_waits(lock, name, key, subclass, inner, LD_WAIT_INV); } @@ -340,6 +347,8 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) +# define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_init_map_waits(lock, name, key, sub, inner, outer) \ do { (void)(name); (void)(key); } while (0) # define lockdep_init_map_wait(lock, name, key, sub, inner) \ diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 9a1fd49df17f..2ec9ff5a7fff 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -30,6 +30,12 @@ enum lockdep_wait_type { LD_WAIT_MAX, /* must be last */ }; +enum lockdep_lock_type { + LD_LOCK_NORMAL = 0, /* normal, catch all */ + LD_LOCK_PERCPU, /* percpu */ + LD_LOCK_MAX, +}; + #ifdef CONFIG_LOCKDEP /* @@ -119,8 +125,10 @@ struct lock_class { int name_version; const char *name; - short wait_type_inner; - short wait_type_outer; + u8 wait_type_inner; + u8 wait_type_outer; + u8 lock_type; + /* u8 hole; */ #ifdef CONFIG_LOCK_STAT unsigned long contention_point[LOCKSTAT_POINTS]; @@ -169,8 +177,10 @@ struct lockdep_map { struct lock_class_key *key; struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; const char *name; - short wait_type_outer; /* can be taken in this context */ - short wait_type_inner; /* presents this context */ + u8 wait_type_outer; /* can be taken in this context */ + u8 wait_type_inner; /* presents this context */ + u8 lock_type; + /* u8 hole; */ #ifdef CONFIG_LOCK_STAT int cpu; unsigned long ip; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c1418b47f625..b061e2991700 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1290,6 +1290,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) class->name_version = count_matching_names(class); class->wait_type_inner = lock->wait_type_inner; class->wait_type_outer = lock->wait_type_outer; + class->lock_type = lock->lock_type; /* * We use RCU's safe list-add method to make * parallel walking of the hash-list safe: @@ -4503,9 +4504,9 @@ print_lock_invalid_wait_context(struct task_struct *curr, */ static int check_wait_context(struct task_struct *curr, struct held_lock *next) { - short next_inner = hlock_class(next)->wait_type_inner; - short next_outer = hlock_class(next)->wait_type_outer; - short curr_inner; + u8 next_inner = hlock_class(next)->wait_type_inner; + u8 next_outer = hlock_class(next)->wait_type_outer; + u8 curr_inner; int depth; if (!curr->lockdep_depth || !next_inner || next->trylock) @@ -4528,7 +4529,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) for (; depth < curr->lockdep_depth; depth++) { struct held_lock *prev = curr->held_locks + depth; - short prev_inner = hlock_class(prev)->wait_type_inner; + u8 prev_inner = hlock_class(prev)->wait_type_inner; if (prev_inner) { /* @@ -4577,9 +4578,9 @@ static inline int check_wait_context(struct task_struct *curr, /* * Initialize a lock instance's lock-class mapping info: */ -void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, +void lockdep_init_map_type(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, - short inner, short outer) + u8 inner, u8 outer, u8 lock_type) { int i; @@ -4602,6 +4603,7 @@ void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, lock->wait_type_outer = outer; lock->wait_type_inner = inner; + lock->lock_type = lock_type; /* * No key, no joy, we need to hash something. @@ -4636,7 +4638,7 @@ void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, raw_local_irq_restore(flags); } } -EXPORT_SYMBOL_GPL(lockdep_init_map_waits); +EXPORT_SYMBOL_GPL(lockdep_init_map_type); struct lock_class_key __lockdep_no_validate__; EXPORT_SYMBOL_GPL(__lockdep_no_validate__); -- cgit v1.2.3 From bc2dd71b283665f0a409d5b6fc603d5a6fdc219e Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 10 Dec 2020 11:02:40 +0100 Subject: locking/lockdep: Add a skip() function to __bfs() Some __bfs() walks will have additional iteration constraints (beyond the path being strong). Provide an additional function to allow terminating graph walks. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) --- kernel/locking/lockdep.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b061e2991700..f50f026772d7 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1672,6 +1672,7 @@ static inline struct lock_list *__bfs_next(struct lock_list *lock, int offset) static enum bfs_result __bfs(struct lock_list *source_entry, void *data, bool (*match)(struct lock_list *entry, void *data), + bool (*skip)(struct lock_list *entry, void *data), struct lock_list **target_entry, int offset) { @@ -1732,7 +1733,12 @@ static enum bfs_result __bfs(struct lock_list *source_entry, /* * Step 3: we haven't visited this and there is a strong * dependency path to this, so check with @match. + * If @skip is provide and returns true, we skip this + * lock (and any path this lock is in). */ + if (skip && skip(lock, data)) + continue; + if (match(lock, data)) { *target_entry = lock; return BFS_RMATCH; @@ -1775,9 +1781,10 @@ static inline enum bfs_result __bfs_forwards(struct lock_list *src_entry, void *data, bool (*match)(struct lock_list *entry, void *data), + bool (*skip)(struct lock_list *entry, void *data), struct lock_list **target_entry) { - return __bfs(src_entry, data, match, target_entry, + return __bfs(src_entry, data, match, skip, target_entry, offsetof(struct lock_class, locks_after)); } @@ -1786,9 +1793,10 @@ static inline enum bfs_result __bfs_backwards(struct lock_list *src_entry, void *data, bool (*match)(struct lock_list *entry, void *data), + bool (*skip)(struct lock_list *entry, void *data), struct lock_list **target_entry) { - return __bfs(src_entry, data, match, target_entry, + return __bfs(src_entry, data, match, skip, target_entry, offsetof(struct lock_class, locks_before)); } @@ -2019,7 +2027,7 @@ static unsigned long __lockdep_count_forward_deps(struct lock_list *this) unsigned long count = 0; struct lock_list *target_entry; - __bfs_forwards(this, (void *)&count, noop_count, &target_entry); + __bfs_forwards(this, (void *)&count, noop_count, NULL, &target_entry); return count; } @@ -2044,7 +2052,7 @@ static unsigned long __lockdep_count_backward_deps(struct lock_list *this) unsigned long count = 0; struct lock_list *target_entry; - __bfs_backwards(this, (void *)&count, noop_count, &target_entry); + __bfs_backwards(this, (void *)&count, noop_count, NULL, &target_entry); return count; } @@ -2072,11 +2080,12 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) static noinline enum bfs_result check_path(struct held_lock *target, struct lock_list *src_entry, bool (*match)(struct lock_list *entry, void *data), + bool (*skip)(struct lock_list *entry, void *data), struct lock_list **target_entry) { enum bfs_result ret; - ret = __bfs_forwards(src_entry, target, match, target_entry); + ret = __bfs_forwards(src_entry, target, match, skip, target_entry); if (unlikely(bfs_error(ret))) print_bfs_bug(ret); @@ -2103,7 +2112,7 @@ check_noncircular(struct held_lock *src, struct held_lock *target, debug_atomic_inc(nr_cyclic_checks); - ret = check_path(target, &src_entry, hlock_conflict, &target_entry); + ret = check_path(target, &src_entry, hlock_conflict, NULL, &target_entry); if (unlikely(ret == BFS_RMATCH)) { if (!*trace) { @@ -2152,7 +2161,7 @@ check_redundant(struct held_lock *src, struct held_lock *target) debug_atomic_inc(nr_redundant_checks); - ret = check_path(target, &src_entry, hlock_equal, &target_entry); + ret = check_path(target, &src_entry, hlock_equal, NULL, &target_entry); if (ret == BFS_RMATCH) debug_atomic_inc(nr_redundant); @@ -2246,7 +2255,7 @@ find_usage_forwards(struct lock_list *root, unsigned long usage_mask, debug_atomic_inc(nr_find_usage_forwards_checks); - result = __bfs_forwards(root, &usage_mask, usage_match, target_entry); + result = __bfs_forwards(root, &usage_mask, usage_match, NULL, target_entry); return result; } @@ -2263,7 +2272,7 @@ find_usage_backwards(struct lock_list *root, unsigned long usage_mask, debug_atomic_inc(nr_find_usage_backwards_checks); - result = __bfs_backwards(root, &usage_mask, usage_match, target_entry); + result = __bfs_backwards(root, &usage_mask, usage_match, NULL, target_entry); return result; } @@ -2628,7 +2637,7 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, */ bfs_init_rootb(&this, prev); - ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL); + ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL, NULL); if (bfs_error(ret)) { print_bfs_bug(ret); return 0; -- cgit v1.2.3 From 175b1a60e8805617d74aefe17ce0d3a32eceb55c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Dec 2020 11:16:34 +0100 Subject: locking/lockdep: Clean up check_redundant() a bit In preparation for adding an TRACE_IRQFLAGS dependent skip function to check_redundant(), move it below the TRACE_IRQFLAGS #ifdef. While there, provide a stub function to reduce #ifdef usage. Signed-off-by: Peter Zijlstra (Intel) --- kernel/locking/lockdep.c | 91 ++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f50f026772d7..f2ae8a65f667 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2130,46 +2130,6 @@ check_noncircular(struct held_lock *src, struct held_lock *target, return ret; } -#ifdef CONFIG_LOCKDEP_SMALL -/* - * Check that the dependency graph starting at can lead to - * or not. If it can, -> dependency is already - * in the graph. - * - * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if - * any error appears in the bfs search. - */ -static noinline enum bfs_result -check_redundant(struct held_lock *src, struct held_lock *target) -{ - enum bfs_result ret; - struct lock_list *target_entry; - struct lock_list src_entry; - - bfs_init_root(&src_entry, src); - /* - * Special setup for check_redundant(). - * - * To report redundant, we need to find a strong dependency path that - * is equal to or stronger than -> . So if is E, - * we need to let __bfs() only search for a path starting at a -(E*)->, - * we achieve this by setting the initial node's ->only_xr to true in - * that case. And if is S, we set initial ->only_xr to false - * because both -(S*)-> (equal) and -(E*)-> (stronger) are redundant. - */ - src_entry.only_xr = src->read == 0; - - debug_atomic_inc(nr_redundant_checks); - - ret = check_path(target, &src_entry, hlock_equal, NULL, &target_entry); - - if (ret == BFS_RMATCH) - debug_atomic_inc(nr_redundant); - - return ret; -} -#endif - #ifdef CONFIG_TRACE_IRQFLAGS /* @@ -2706,6 +2666,55 @@ static inline int check_irq_usage(struct task_struct *curr, } #endif /* CONFIG_TRACE_IRQFLAGS */ +#ifdef CONFIG_LOCKDEP_SMALL +/* + * Check that the dependency graph starting at can lead to + * or not. If it can, -> dependency is already + * in the graph. + * + * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if + * any error appears in the bfs search. + */ +static noinline enum bfs_result +check_redundant(struct held_lock *src, struct held_lock *target) +{ + enum bfs_result ret; + struct lock_list *target_entry; + struct lock_list src_entry; + + bfs_init_root(&src_entry, src); + /* + * Special setup for check_redundant(). + * + * To report redundant, we need to find a strong dependency path that + * is equal to or stronger than -> . So if is E, + * we need to let __bfs() only search for a path starting at a -(E*)->, + * we achieve this by setting the initial node's ->only_xr to true in + * that case. And if is S, we set initial ->only_xr to false + * because both -(S*)-> (equal) and -(E*)-> (stronger) are redundant. + */ + src_entry.only_xr = src->read == 0; + + debug_atomic_inc(nr_redundant_checks); + + ret = check_path(target, &src_entry, hlock_equal, NULL, &target_entry); + + if (ret == BFS_RMATCH) + debug_atomic_inc(nr_redundant); + + return ret; +} + +#else + +static inline enum bfs_result +check_redundant(struct held_lock *src, struct held_lock *target) +{ + return BFS_RNOMATCH; +} + +#endif + static void inc_chains(int irq_context) { if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT) @@ -2926,7 +2935,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, } } -#ifdef CONFIG_LOCKDEP_SMALL /* * Is the -> link redundant? */ @@ -2935,7 +2943,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return 0; else if (ret == BFS_RMATCH) return 2; -#endif if (!*trace) { *trace = save_trace(); -- cgit v1.2.3 From 5f2962401c6e195222f320d12b3a55377b2d4653 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 10 Dec 2020 11:15:00 +0100 Subject: locking/lockdep: Exclude local_lock_t from IRQ inversions The purpose of local_lock_t is to abstract: preempt_disable() / local_bh_disable() / local_irq_disable(). These are the traditional means of gaining access to per-cpu data, but are fundamentally non-preemptible. local_lock_t provides a per-cpu lock, that on !PREEMPT_RT reduces to no-ops, just like regular spinlocks do on UP. This gives rise to: CPU0 CPU1 local_lock(B) spin_lock_irq(A) spin_lock(A) local_lock(B) Where lockdep then figures things will lock up; which would be true if B were any other kind of lock. However this is a false positive, no such deadlock actually exists. For !RT the above local_lock(B) is preempt_disable(), and there's obviously no deadlock; alternatively, CPU0's B != CPU1's B. For RT the argument is that since local_lock() nests inside spin_lock(), it cannot be used in hardirq context, and therefore CPU0 cannot in fact happen. Even though B is a real lock, it is a preemptible lock and any threaded-irq would simply schedule out and let the preempted task (which holds B) continue such that the task on CPU1 can make progress, after which the threaded-irq resumes and can finish. This means that we can never form an IRQ inversion on a local_lock dependency, so terminate the graph walk when looking for IRQ inversions when we encounter one. One consequence is that (for LOCKDEP_SMALL) when we look for redundant dependencies, A -> B is not redundant in the presence of A -> L -> B. Signed-off-by: Boqun Feng [peterz: Changelog] Signed-off-by: Peter Zijlstra (Intel) --- kernel/locking/lockdep.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f2ae8a65f667..ad9afd8c7eb9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2200,6 +2200,44 @@ static inline bool usage_match(struct lock_list *entry, void *mask) return !!((entry->class->usage_mask & LOCKF_IRQ) & *(unsigned long *)mask); } +static inline bool usage_skip(struct lock_list *entry, void *mask) +{ + /* + * Skip local_lock() for irq inversion detection. + * + * For !RT, local_lock() is not a real lock, so it won't carry any + * dependency. + * + * For RT, an irq inversion happens when we have lock A and B, and on + * some CPU we can have: + * + * lock(A); + * + * lock(B); + * + * where lock(B) cannot sleep, and we have a dependency B -> ... -> A. + * + * Now we prove local_lock() cannot exist in that dependency. First we + * have the observation for any lock chain L1 -> ... -> Ln, for any + * 1 <= i <= n, Li.inner_wait_type <= L1.inner_wait_type, otherwise + * wait context check will complain. And since B is not a sleep lock, + * therefore B.inner_wait_type >= 2, and since the inner_wait_type of + * local_lock() is 3, which is greater than 2, therefore there is no + * way the local_lock() exists in the dependency B -> ... -> A. + * + * As a result, we will skip local_lock(), when we search for irq + * inversion bugs. + */ + if (entry->class->lock_type == LD_LOCK_PERCPU) { + if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG)) + return false; + + return true; + } + + return false; +} + /* * Find a node in the forwards-direction dependency sub-graph starting * at @root->class that matches @bit. @@ -2215,7 +2253,7 @@ find_usage_forwards(struct lock_list *root, unsigned long usage_mask, debug_atomic_inc(nr_find_usage_forwards_checks); - result = __bfs_forwards(root, &usage_mask, usage_match, NULL, target_entry); + result = __bfs_forwards(root, &usage_mask, usage_match, usage_skip, target_entry); return result; } @@ -2232,7 +2270,7 @@ find_usage_backwards(struct lock_list *root, unsigned long usage_mask, debug_atomic_inc(nr_find_usage_backwards_checks); - result = __bfs_backwards(root, &usage_mask, usage_match, NULL, target_entry); + result = __bfs_backwards(root, &usage_mask, usage_match, usage_skip, target_entry); return result; } @@ -2597,7 +2635,7 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, */ bfs_init_rootb(&this, prev); - ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL, NULL); + ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, usage_skip, NULL); if (bfs_error(ret)) { print_bfs_bug(ret); return 0; @@ -2664,6 +2702,12 @@ static inline int check_irq_usage(struct task_struct *curr, { return 1; } + +static inline bool usage_skip(struct lock_list *entry, void *mask) +{ + return false; +} + #endif /* CONFIG_TRACE_IRQFLAGS */ #ifdef CONFIG_LOCKDEP_SMALL @@ -2697,7 +2741,12 @@ check_redundant(struct held_lock *src, struct held_lock *target) debug_atomic_inc(nr_redundant_checks); - ret = check_path(target, &src_entry, hlock_equal, NULL, &target_entry); + /* + * Note: we skip local_lock() for redundant check, because as the + * comment in usage_skip(), A -> local_lock() -> B and A -> B are not + * the same. + */ + ret = check_path(target, &src_entry, hlock_equal, usage_skip, &target_entry); if (ret == BFS_RMATCH) debug_atomic_inc(nr_redundant); -- cgit v1.2.3 From 997acaf6b4b59c6a9c259740312a69ea549cc684 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 11 Jan 2021 15:37:07 +0000 Subject: lockdep: report broken irq restoration We generally expect local_irq_save() and local_irq_restore() to be paired and sanely nested, and so local_irq_restore() expects to be called with irqs disabled. Thus, within local_irq_restore() we only trace irq flag changes when unmasking irqs. This means that a sequence such as: | local_irq_disable(); | local_irq_save(flags); | local_irq_enable(); | local_irq_restore(flags); ... is liable to break things, as the local_irq_restore() would mask irqs without tracing this change. Similar problems may exist for architectures whose arch_irq_restore() function depends on being called with irqs disabled. We don't consider such sequences to be a good idea, so let's define those as forbidden, and add tooling to detect such broken cases. This patch adds debug code to WARN() when raw_local_irq_restore() is called with irqs enabled. As raw_local_irq_restore() is expected to pair with raw_local_irq_save(), it should never be called with irqs enabled. To avoid the possibility of circular header dependencies between irqflags.h and bug.h, the warning is handled in a separate C file. The new code is all conditional on a new CONFIG_DEBUG_IRQFLAGS symbol which is independent of CONFIG_TRACE_IRQFLAGS. As noted above such cases will confuse lockdep, so CONFIG_DEBUG_LOCKDEP now selects CONFIG_DEBUG_IRQFLAGS. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210111153707.10071-1-mark.rutland@arm.com --- include/linux/irqflags.h | 12 ++++++++++++ kernel/locking/Makefile | 1 + kernel/locking/irqflag-debug.c | 11 +++++++++++ lib/Kconfig.debug | 8 ++++++++ 4 files changed, 32 insertions(+) create mode 100644 kernel/locking/irqflag-debug.c (limited to 'kernel') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 8de0e1373de7..600c10da321a 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -149,6 +149,17 @@ do { \ # define start_critical_timings() do { } while (0) #endif +#ifdef CONFIG_DEBUG_IRQFLAGS +extern void warn_bogus_irq_restore(void); +#define raw_check_bogus_irq_restore() \ + do { \ + if (unlikely(!arch_irqs_disabled())) \ + warn_bogus_irq_restore(); \ + } while (0) +#else +#define raw_check_bogus_irq_restore() do { } while (0) +#endif + /* * Wrap the arch provided IRQ routines to provide appropriate checks. */ @@ -162,6 +173,7 @@ do { \ #define raw_local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ + raw_check_bogus_irq_restore(); \ arch_local_irq_restore(flags); \ } while (0) #define raw_local_save_flags(flags) \ diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 6d11cfb9b41f..8838f1d7c4a2 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -15,6 +15,7 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) endif +obj-$(CONFIG_DEBUG_IRQFLAGS) += irqflag-debug.o obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) diff --git a/kernel/locking/irqflag-debug.c b/kernel/locking/irqflag-debug.c new file mode 100644 index 000000000000..9603d207a4ca --- /dev/null +++ b/kernel/locking/irqflag-debug.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +void warn_bogus_irq_restore(void) +{ + WARN_ONCE(1, "raw_local_irq_restore() called with IRQs enabled\n"); +} +EXPORT_SYMBOL(warn_bogus_irq_restore); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e6e58b26e888..78eadf615df8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1343,6 +1343,7 @@ config LOCKDEP_SMALL config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP + select DEBUG_IRQFLAGS help If you say Y here, the lock dependency engine will do additional runtime checks to debug itself, at the price @@ -1431,6 +1432,13 @@ config TRACE_IRQFLAGS_NMI depends on TRACE_IRQFLAGS depends on TRACE_IRQFLAGS_NMI_SUPPORT +config DEBUG_IRQFLAGS + bool "Debug IRQ flag manipulation" + help + Enables checks for potentially unsafe enabling or disabling of + interrupts, such as calling raw_local_irq_restore() when interrupts + are enabled. + config STACKTRACE bool "Stack backtrace support" depends on STACKTRACE_SUPPORT -- cgit v1.2.3 From 1ce53e2c2ac069e7b3c400a427002a70deb4a916 Mon Sep 17 00:00:00 2001 From: Alejandro Colomar Date: Sat, 28 Nov 2020 13:39:46 +0100 Subject: futex: Change utime parameter to be 'const ... *' futex(2) says that 'utime' is a pointer to 'const'. The implementation doesn't use 'const'; however, it _never_ modifies the contents of utime. - futex() either uses 'utime' as a pointer to struct or as a 'u32'. - In case it's used as a 'u32', it makes a copy of it, and of course it is not dereferenced. - In case it's used as a 'struct __kernel_timespec __user *', the pointer is not dereferenced inside the futex() definition, and it is only passed to a function: get_timespec64(), which accepts a 'const struct __kernel_timespec __user *'. [ tglx: Make the same change to the compat syscall and fixup the prototypes. ] Signed-off-by: Alejandro Colomar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201128123945.4592-1-alx.manpages@gmail.com --- include/linux/syscalls.h | 8 ++++---- kernel/futex.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f3929aff39cf..5cb74edd9a4f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -583,11 +583,11 @@ asmlinkage long sys_unshare(unsigned long unshare_flags); /* kernel/futex.c */ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct __kernel_timespec __user *utime, u32 __user *uaddr2, - u32 val3); + const struct __kernel_timespec __user *utime, + u32 __user *uaddr2, u32 val3); asmlinkage long sys_futex_time32(u32 __user *uaddr, int op, u32 val, - struct old_timespec32 __user *utime, u32 __user *uaddr2, - u32 val3); + const struct old_timespec32 __user *utime, + u32 __user *uaddr2, u32 val3); asmlinkage long sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr); diff --git a/kernel/futex.c b/kernel/futex.c index c47d1015d759..d0775aab8da9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3790,8 +3790,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, - u32, val3) + const struct __kernel_timespec __user *, utime, + u32 __user *, uaddr2, u32, val3) { struct timespec64 ts; ktime_t t, *tp = NULL; @@ -3986,7 +3986,7 @@ err_unlock: #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) { struct timespec64 ts; -- cgit v1.2.3 From 0f9438503ea1312ef49be4d9762e0f0006546364 Mon Sep 17 00:00:00 2001 From: Jangwoong Kim <6812skiii@gmail.com> Date: Wed, 30 Dec 2020 21:29:53 +0900 Subject: futex: Remove unneeded gotos Get rid of gotos that do not contain any cleanup. These were not removed in commit 9180bd467f9a ("futex: Remove put_futex_key()"). Signed-off-by: Jangwoong Kim <6812skiii@gmail.com> Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201230122953.10473-1-6812skiii@gmail.com --- kernel/futex.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index d0775aab8da9..f3570a276990 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3024,7 +3024,7 @@ retry: * Success, we're done! No tricky corner cases. */ if (!ret) - goto out_putkey; + return ret; /* * The atomic access to the futex value generated a * pagefault, so retry the user-access and the wakeup: @@ -3041,7 +3041,7 @@ retry: * wake_futex_pi has detected invalid state. Tell user * space. */ - goto out_putkey; + return ret; } /* @@ -3062,7 +3062,7 @@ retry: default: WARN_ON_ONCE(1); - goto out_putkey; + return ret; } } @@ -3073,7 +3073,6 @@ retry: out_unlock: spin_unlock(&hb->lock); -out_putkey: return ret; pi_retry: -- cgit v1.2.3 From bf594bf400016a1ac58c753bcc0393a39c36f669 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 13 Nov 2020 16:58:11 +0800 Subject: locking/rtmutex: Add missing kernel-doc markup To fix the following issues: kernel/locking/rtmutex.c:1612: warning: Function parameter or member 'lock' not described in '__rt_mutex_futex_unlock' kernel/locking/rtmutex.c:1612: warning: Function parameter or member 'wake_q' not described in '__rt_mutex_futex_unlock' kernel/locking/rtmutex.c:1675: warning: Function parameter or member 'name' not described in '__rt_mutex_init' kernel/locking/rtmutex.c:1675: warning: Function parameter or member 'key' not described in '__rt_mutex_init' [ tglx: Change rt lock to rt_mutex for consistency sake ] Signed-off-by: Alex Shi Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lore.kernel.org/r/1605257895-5536-2-git-send-email-alex.shi@linux.alibaba.com --- kernel/locking/rtmutex.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index cfdd5b93264d..a201e5ef9374 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1604,8 +1604,11 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) EXPORT_SYMBOL_GPL(rt_mutex_unlock); /** - * Futex variant, that since futex variants do not use the fast-path, can be - * simple and will not need to retry. + * __rt_mutex_futex_unlock - Futex variant, that since futex variants + * do not use the fast-path, can be simple and will not need to retry. + * + * @lock: The rt_mutex to be unlocked + * @wake_q: The wake queue head from which to get the next lock waiter */ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, struct wake_q_head *wake_q) @@ -1662,13 +1665,15 @@ void rt_mutex_destroy(struct rt_mutex *lock) EXPORT_SYMBOL_GPL(rt_mutex_destroy); /** - * __rt_mutex_init - initialize the rt lock + * __rt_mutex_init - initialize the rt_mutex * - * @lock: the rt lock to be initialized + * @lock: The rt_mutex to be initialized + * @name: The lock name used for debugging + * @key: The lock class key used for debugging * - * Initialize the rt lock to unlocked state. + * Initialize the rt_mutex to unlocked state. * - * Initializing of a locked rt lock is not allowed + * Initializing of a locked rt_mutex is not allowed */ void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key) -- cgit v1.2.3 From 442187f3c2de40bab13b8f9751b37925bede73b0 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 26 Jan 2021 12:17:21 +0200 Subject: locking/rwsem: Remove empty rwsem.h This is a leftover from 7f26482a872c ("locking/percpu-rwsem: Remove the embedded rwsem") Signed-off-by: Nikolay Borisov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20210126101721.976027-1-nborisov@suse.com --- kernel/locking/rwsem.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 kernel/locking/rwsem.h (limited to 'kernel') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.3 From 7f82e631d236cafd28518b998c6d4d8dc2ef68f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2021 11:55:38 +0100 Subject: locking/lockdep: Avoid unmatched unlock Commit f6f48e180404 ("lockdep: Teach lockdep about "USED" <- "IN-NMI" inversions") overlooked that print_usage_bug() releases the graph_lock and called it without the graph lock held. Fixes: f6f48e180404 ("lockdep: Teach lockdep about "USED" <- "IN-NMI" inversions") Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lkml.kernel.org/r/YBfkuyIfB1+VRxXP@hirez.programming.kicks-ass.net --- kernel/locking/lockdep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ad9afd8c7eb9..5104db0e23e4 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3773,7 +3773,7 @@ static void print_usage_bug(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) { - if (!debug_locks_off_graph_unlock() || debug_locks_silent) + if (!debug_locks_off() || debug_locks_silent) return; pr_warn("\n"); @@ -3814,6 +3814,7 @@ valid_state(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) { if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) { + graph_unlock(); print_usage_bug(curr, this, bad_bit, new_bit); return 0; } -- cgit v1.2.3 From c8cc7e853192d520ab6a5957f5081034103587ae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Feb 2021 09:30:03 +0100 Subject: lockdep: Noinstr annotate warn_bogus_irq_restore() vmlinux.o: warning: objtool: lock_is_held_type()+0x107: call to warn_bogus_irq_restore() leaves .noinstr.text section As per the general rule that WARNs are allowed to violate noinstr to get out, annotate it away. Fixes: 997acaf6b4b5 ("lockdep: report broken irq restoration") Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Acked-by: Randy Dunlap # build-tested Link: https://lkml.kernel.org/r/YCKyYg53mMp4E7YI@hirez.programming.kicks-ass.net --- kernel/locking/irqflag-debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/irqflag-debug.c b/kernel/locking/irqflag-debug.c index 9603d207a4ca..810b50344d35 100644 --- a/kernel/locking/irqflag-debug.c +++ b/kernel/locking/irqflag-debug.c @@ -4,8 +4,10 @@ #include #include -void warn_bogus_irq_restore(void) +noinstr void warn_bogus_irq_restore(void) { + instrumentation_begin(); WARN_ONCE(1, "raw_local_irq_restore() called with IRQs enabled\n"); + instrumentation_end(); } EXPORT_SYMBOL(warn_bogus_irq_restore); -- cgit v1.2.3 From 0f319d49a4167e402b01b2b56639386f0b6846ba Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 10 Feb 2021 09:52:47 +0100 Subject: locking/mutex: Kill mutex_trylock_recursive() There are not users of mutex_trylock_recursive() in tree as of v5.11-rc7. Remove it. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210210085248.219210-2-bigeasy@linutronix.de --- include/linux/mutex.h | 25 ------------------------- kernel/locking/mutex.c | 10 ---------- 2 files changed, 35 deletions(-) (limited to 'kernel') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index dcd185cbfe79..0cd631a19727 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -199,29 +199,4 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -/* - * These values are chosen such that FAIL and SUCCESS match the - * values of the regular mutex_trylock(). - */ -enum mutex_trylock_recursive_enum { - MUTEX_TRYLOCK_FAILED = 0, - MUTEX_TRYLOCK_SUCCESS = 1, - MUTEX_TRYLOCK_RECURSIVE, -}; - -/** - * mutex_trylock_recursive - trylock variant that allows recursive locking - * @lock: mutex to be locked - * - * This function should not be used, _ever_. It is purely for hysterical GEM - * raisins, and once those are gone this will be removed. - * - * Returns: - * - MUTEX_TRYLOCK_FAILED - trylock failed, - * - MUTEX_TRYLOCK_SUCCESS - lock acquired, - * - MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. - */ -extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum -mutex_trylock_recursive(struct mutex *lock); - #endif /* __LINUX_MUTEX_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 5352ce50a97e..adb935090768 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -86,16 +86,6 @@ bool mutex_is_locked(struct mutex *lock) } EXPORT_SYMBOL(mutex_is_locked); -__must_check enum mutex_trylock_recursive_enum -mutex_trylock_recursive(struct mutex *lock) -{ - if (unlikely(__mutex_owner(lock) == current)) - return MUTEX_TRYLOCK_RECURSIVE; - - return mutex_trylock(lock); -} -EXPORT_SYMBOL(mutex_trylock_recursive); - static inline unsigned long __owner_flags(unsigned long owner) { return owner & MUTEX_FLAGS; -- cgit v1.2.3