From 93d0955e6cf562d02aae37f5f8d98d9d9d16e0d4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 May 2021 20:04:28 +0200 Subject: locking: Fix comment typos A few snuck through. Signed-off-by: Ingo Molnar --- kernel/futex.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 4938a00bc785..2f386f012900 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1874,7 +1874,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, * If the caller intends to requeue more than 1 waiter to pifutex, * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, * as we have means to handle the possible fault. If not, don't set - * the bit unecessarily as it will force the subsequent unlock to enter + * the bit unnecessarily as it will force the subsequent unlock to enter * the kernel. */ top_waiter = futex_top_waiter(hb1, key1); @@ -2103,7 +2103,7 @@ retry_private: continue; /* - * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always + * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always * be paired with each other and no other futex ops. * * We should never be requeueing a futex_q with a pi_state, @@ -2318,7 +2318,7 @@ retry: } /* - * PI futexes can not be requeued and must remove themself from the + * PI futexes can not be requeued and must remove themselves from the * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. */ static void unqueue_me_pi(struct futex_q *q) @@ -2903,7 +2903,7 @@ no_block: */ res = fixup_owner(uaddr, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it acquired + * If fixup_owner() returned an error, propagate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. */ if (res) @@ -3280,7 +3280,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ res = fixup_owner(uaddr2, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it + * If fixup_owner() returned an error, propagate that. If it * acquired the lock, clear -ETIMEDOUT or -EINTR. */ if (res) @@ -3678,7 +3678,7 @@ void futex_exec_release(struct task_struct *tsk) { /* * The state handling is done for consistency, but in the case of - * exec() there is no way to prevent futher damage as the PID stays + * exec() there is no way to prevent further damage as the PID stays * the same. But for the unlikely and arguably buggy case that a * futex is held on exec(), this provides at least as much state * consistency protection which is possible. -- cgit v1.2.3 From a82adc7650044b5555d65078bda07866efa4a73d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 17 May 2021 14:30:12 +0100 Subject: futex: Deduplicate cond_resched() invocation in futex_wake_op() After pagefaulting in futex_wake_op() both branches do cond_resched() before retry. Deduplicate it as compilers cannot figure it out themself. Signed-off-by: Pavel Begunkov Signed-off-by: Thomas Gleixner Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/9b2588c1fd33c91fb01c4e348a3b647ab2c8baab.1621258128.git.asml.silence@gmail.com --- kernel/futex.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 2f386f012900..08008c225bec 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1728,12 +1728,9 @@ retry_private: return ret; } - if (!(flags & FLAGS_SHARED)) { - cond_resched(); - goto retry_private; - } - cond_resched(); + if (!(flags & FLAGS_SHARED)) + goto retry_private; goto retry; } -- cgit v1.2.3 From 69c7a5fb2482636f525f016c8333fdb9111ecb9d Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sat, 19 Jun 2021 01:01:07 +0800 Subject: locking/lockdep: Fix the dep path printing for backwards BFS We use the same code to print backwards lock dependency path as the forwards lock dependency path, and this could result into incorrect printing because for a backwards lock_list ->trace is not the call trace where the lock of ->class is acquired. Fix this by introducing a separate function on printing the backwards dependency path. Also add a few comments about the printing while we are at it. Reported-by: Johannes Berg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210618170110.3699115-2-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 48d736aa03b2..3b32cd9cdfd0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2304,7 +2304,56 @@ static void print_lock_class_header(struct lock_class *class, int depth) } /* - * printk the shortest lock dependencies from @start to @end in reverse order: + * Dependency path printing: + * + * After BFS we get a lock dependency path (linked via ->parent of lock_list), + * printing out each lock in the dependency path will help on understanding how + * the deadlock could happen. Here are some details about dependency path + * printing: + * + * 1) A lock_list can be either forwards or backwards for a lock dependency, + * for a lock dependency A -> B, there are two lock_lists: + * + * a) lock_list in the ->locks_after list of A, whose ->class is B and + * ->links_to is A. In this case, we can say the lock_list is + * "A -> B" (forwards case). + * + * b) lock_list in the ->locks_before list of B, whose ->class is A + * and ->links_to is B. In this case, we can say the lock_list is + * "B <- A" (bacwards case). + * + * The ->trace of both a) and b) point to the call trace where B was + * acquired with A held. + * + * 2) A "helper" lock_list is introduced during BFS, this lock_list doesn't + * represent a certain lock dependency, it only provides an initial entry + * for BFS. For example, BFS may introduce a "helper" lock_list whose + * ->class is A, as a result BFS will search all dependencies starting with + * A, e.g. A -> B or A -> C. + * + * The notation of a forwards helper lock_list is like "-> A", which means + * we should search the forwards dependencies starting with "A", e.g A -> B + * or A -> C. + * + * The notation of a bacwards helper lock_list is like "<- B", which means + * we should search the backwards dependencies ending with "B", e.g. + * B <- A or B <- C. + */ + +/* + * printk the shortest lock dependencies from @root to @leaf in reverse order. + * + * We have a lock dependency path as follow: + * + * @root @leaf + * | | + * V V + * ->parent ->parent + * | lock_list | <--------- | lock_list | ... | lock_list | <--------- | lock_list | + * | -> L1 | | L1 -> L2 | ... |Ln-2 -> Ln-1| | Ln-1 -> Ln| + * + * , so it's natural that we start from @leaf and print every ->class and + * ->trace until we reach the @root. */ static void __used print_shortest_lock_dependencies(struct lock_list *leaf, @@ -2332,6 +2381,61 @@ print_shortest_lock_dependencies(struct lock_list *leaf, } while (entry && (depth >= 0)); } +/* + * printk the shortest lock dependencies from @leaf to @root. + * + * We have a lock dependency path (from a backwards search) as follow: + * + * @leaf @root + * | | + * V V + * ->parent ->parent + * | lock_list | ---------> | lock_list | ... | lock_list | ---------> | lock_list | + * | L2 <- L1 | | L3 <- L2 | ... | Ln <- Ln-1 | | <- Ln | + * + * , so when we iterate from @leaf to @root, we actually print the lock + * dependency path L1 -> L2 -> .. -> Ln in the non-reverse order. + * + * Another thing to notice here is that ->class of L2 <- L1 is L1, while the + * ->trace of L2 <- L1 is the call trace of L2, in fact we don't have the call + * trace of L1 in the dependency path, which is alright, because most of the + * time we can figure out where L1 is held from the call trace of L2. + */ +static void __used +print_shortest_lock_dependencies_backwards(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + const struct lock_trace *trace = NULL; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + if (trace) { + printk("%*s ... acquired at:\n", depth, ""); + print_lock_trace(trace, 2); + printk("\n"); + } + + /* + * Record the pointer to the trace for the next lock_list + * entry, see the comments for the function. + */ + trace = entry->trace; + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); +} + static void print_irq_lock_scenario(struct lock_list *safe_entry, struct lock_list *unsafe_entry, @@ -2449,7 +2553,7 @@ print_bad_irq_dependency(struct task_struct *curr, prev_root->trace = save_trace(); if (!prev_root->trace) return; - print_shortest_lock_dependencies(backwards_entry, prev_root); + print_shortest_lock_dependencies_backwards(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); pr_warn(" and %s-irq-unsafe lock:\n", irqclass); -- cgit v1.2.3 From d4c157c7b1a67a0844a904baaca9a840c196c103 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sat, 19 Jun 2021 01:01:08 +0800 Subject: locking/lockdep: Remove the unnecessary trace saving In print_bad_irq_dependency(), save_trace() is called to set the ->trace for @prev_root as the current call trace, however @prev_root corresponds to the the held lock, which may not be acquired in current call trace, therefore it's wrong to use save_trace() to set ->trace of @prev_root. Moreover, with our adjustment of printing backwards dependency path, the ->trace of @prev_root is unncessary, so remove it. Reported-by: Johannes Berg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210618170110.3699115-3-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 3b32cd9cdfd0..74d084a398be 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2550,9 +2550,6 @@ print_bad_irq_dependency(struct task_struct *curr, lockdep_print_held_locks(curr); pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); - prev_root->trace = save_trace(); - if (!prev_root->trace) - return; print_shortest_lock_dependencies_backwards(backwards_entry, prev_root); pr_warn("\nthe dependencies between the lock to be acquired"); -- cgit v1.2.3 From 7b1f8c6179769af6ffa055e1169610b51d71edd5 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Sat, 19 Jun 2021 01:01:09 +0800 Subject: lockding/lockdep: Avoid to find wrong lock dep path in check_irq_usage() In the step #3 of check_irq_usage(), we seach backwards to find a lock whose usage conflicts the usage of @target_entry1 on safe/unsafe. However, we should only keep the irq-unsafe usage of @target_entry1 into consideration, because it could be a case where a lock is hardirq-unsafe but soft-safe, and in check_irq_usage() we find it because its hardirq-unsafe could result into a hardirq-safe-unsafe deadlock, but currently since we don't filter out the other usage bits, so we may find a lock dependency path softirq-unsafe -> softirq-safe, which in fact doesn't cause a deadlock. And this may cause misleading lockdep splats. Fix this by only keeping LOCKF_ENABLED_IRQ_ALL bits when we try the backwards search. Reported-by: Johannes Berg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210618170110.3699115-4-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 74d084a398be..6ff1e8405a83 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2768,8 +2768,18 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, * Step 3: we found a bad match! Now retrieve a lock from the backward * list whose usage mask matches the exclusive usage mask from the * lock found on the forward list. + * + * Note, we should only keep the LOCKF_ENABLED_IRQ_ALL bits, considering + * the follow case: + * + * When trying to add A -> B to the graph, we find that there is a + * hardirq-safe L, that L -> ... -> A, and another hardirq-unsafe M, + * that B -> ... -> M. However M is **softirq-safe**, if we use exact + * invert bits of M's usage_mask, we will find another lock N that is + * **softirq-unsafe** and N -> ... -> A, however N -> .. -> M will not + * cause a inversion deadlock. */ - backward_mask = original_mask(target_entry1->class->usage_mask); + backward_mask = original_mask(target_entry1->class->usage_mask & LOCKF_ENABLED_IRQ_ALL); ret = find_usage_backwards(&this, backward_mask, &target_entry); if (bfs_error(ret)) { -- cgit v1.2.3 From f8b298cc39f0619544c607eaef09fd0b2afd10f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Jun 2021 20:57:18 +0200 Subject: lockdep: Fix wait-type for empty stack Even the very first lock can violate the wait-context check, consider the various IRQ contexts. Fixes: de8f5e4f2dc1 ("lockdep: Introduce wait-type checks") Signed-off-by: Peter Zijlstra (Intel) Tested-by: Joerg Roedel Link: https://lore.kernel.org/r/20210617190313.256987481@infradead.org --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6ff1e8405a83..0584b2090084 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4688,7 +4688,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) u8 curr_inner; int depth; - if (!curr->lockdep_depth || !next_inner || next->trylock) + if (!next_inner || next->trylock) return 0; if (!next_outer) -- cgit v1.2.3 From e112c41341c03d9224a9fc522bdb3539bc849b56 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Apr 2021 21:44:22 +0200 Subject: futex: Prepare futex_lock_pi() for runtime clock selection futex_lock_pi() is the only futex operation which cannot select the clock for timeouts (CLOCK_MONOTONIC/CLOCK_REALTIME). That's inconsistent and there is no particular reason why this cannot be supported. This was overlooked when CLOCK_REALTIME_FLAG was introduced and unfortunately not reported when the inconsistency was discovered in glibc. Prepare the function and enforce the CLOCK_REALTIME_FLAG on FUTEX_LOCK_PI so that a new FUTEX_LOCK_PI2 can implement it correctly. Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210422194705.338657741@linutronix.de --- kernel/futex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 08008c225bec..f820439a8aae 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2783,7 +2783,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, if (refill_pi_state_cache()) return -ENOMEM; - to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0); + to = futex_setup_timer(time, &timeout, flags, 0); retry: ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); @@ -3739,6 +3739,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: + flags |= FLAGS_CLOCKRT; return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); -- cgit v1.2.3 From bf22a6976897977b0a3f1aeba6823c959fc4fdae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Apr 2021 21:44:23 +0200 Subject: futex: Provide FUTEX_LOCK_PI2 to support clock selection The FUTEX_LOCK_PI futex operand uses a CLOCK_REALTIME based absolute timeout since it was implemented, but it does not require that the FUTEX_CLOCK_REALTIME flag is set, because that was introduced later. In theory as none of the user space implementations can set the FUTEX_CLOCK_REALTIME flag on this operand, it would be possible to creatively abuse it and make the meaning invers, i.e. select CLOCK_REALTIME when not set and CLOCK_MONOTONIC when set. But that's a nasty hackery. Another option would be to have a new FUTEX_CLOCK_MONOTONIC flag only for FUTEX_LOCK_PI, but that's also awkward because it does not allow libraries to handle the timeout clock selection consistently. So provide a new FUTEX_LOCK_PI2 operand which implements the timeout semantics which the other operands use and leave FUTEX_LOCK_PI alone. Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210422194705.440773992@linutronix.de --- include/uapi/linux/futex.h | 2 ++ kernel/futex.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index a89eb0accd5e..235e5b2facaa 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -21,6 +21,7 @@ #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 +#define FUTEX_LOCK_PI2 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -32,6 +33,7 @@ #define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_LOCK_PI2_PRIVATE (FUTEX_LOCK_PI2 | FUTEX_PRIVATE_FLAG) #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG) diff --git a/kernel/futex.c b/kernel/futex.c index f820439a8aae..f832b6434625 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3707,12 +3707,14 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (op & FUTEX_CLOCK_REALTIME) { flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) + if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && + cmd != FUTEX_LOCK_PI2) return -ENOSYS; } switch (cmd) { case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: case FUTEX_UNLOCK_PI: case FUTEX_TRYLOCK_PI: case FUTEX_WAIT_REQUEUE_PI: @@ -3740,6 +3742,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: flags |= FLAGS_CLOCKRT; + fallthrough; + case FUTEX_LOCK_PI2: return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); @@ -3760,6 +3764,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd) switch (cmd) { case FUTEX_WAIT: case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: case FUTEX_WAIT_BITSET: case FUTEX_WAIT_REQUEUE_PI: return true; -- cgit v1.2.3 From 0e8a89d49d45197770f2e57fb15f1bc9ded96eb0 Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Fri, 18 Jun 2021 21:02:30 +0800 Subject: locking/lockdep: Correct the description error for check_redundant() If there is no matched result, check_redundant() will return BFS_RNOMATCH. Signed-off-by: Xiongwei Song Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boqun Feng Link: https://lkml.kernel.org/r/20210618130230.123249-1-sxwjean@me.com --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 0584b2090084..095c87f97a31 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2829,7 +2829,7 @@ static inline bool usage_skip(struct lock_list *entry, void *mask) * or not. If it can, -> dependency is already * in the graph. * - * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if + * Return BFS_RMATCH if it does, or BFS_RNOMATCH if it does not, return BFS_E* if * any error appears in the bfs search. */ static noinline enum bfs_result -- cgit v1.2.3