From 6fc45b6ed921dc00dfb264dc08c7d67ee63d2656 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:21:14 +0100 Subject: dm-delay: fix a race between delay_presuspend and delay_bio In delay_presuspend, we set the atomic variable may_delay and then stop the timer and flush pending bios. The intention here is to prevent the delay target from re-arming the timer again. However, this test is racy. Suppose that one thread goes to delay_bio, sees that dc->may_delay is one and proceeds; now, another thread executes delay_presuspend, it sets dc->may_delay to zero, deletes the timer and flushes pending bios. Then, the first thread continues and adds the bio to delayed->list despite the fact that dc->may_delay is false. Fix this bug by changing may_delay's type from atomic_t to bool and only access it while holding the delayed_bios_lock mutex. Note that we don't have to grab the mutex in delay_resume because there are no bios in flight at this point. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-delay.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index efd510984e25..2d6b900e4353 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -33,7 +33,7 @@ struct delay_c { struct work_struct flush_expired_bios; struct list_head delayed_bios; struct task_struct *worker; - atomic_t may_delay; + bool may_delay; struct delay_class read; struct delay_class write; @@ -236,7 +236,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = dc; INIT_LIST_HEAD(&dc->delayed_bios); - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; dc->argc = argc; ret = delay_class_ctr(ti, &dc->read, argv); @@ -312,7 +312,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) struct dm_delay_info *delayed; unsigned long expires = 0; - if (!c->delay || !atomic_read(&dc->may_delay)) + if (!c->delay) return DM_MAPIO_REMAPPED; delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); @@ -321,6 +321,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); mutex_lock(&delayed_bios_lock); + if (unlikely(!dc->may_delay)) { + mutex_unlock(&delayed_bios_lock); + return DM_MAPIO_REMAPPED; + } c->ops++; list_add_tail(&delayed->list, &dc->delayed_bios); mutex_unlock(&delayed_bios_lock); @@ -337,7 +341,9 @@ static void delay_presuspend(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 0); + mutex_lock(&delayed_bios_lock); + dc->may_delay = false; + mutex_unlock(&delayed_bios_lock); if (delay_is_fast(dc)) flush_delayed_bios_fast(dc, true); @@ -351,7 +357,7 @@ static void delay_resume(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; } static int delay_map(struct dm_target *ti, struct bio *bio) -- cgit v1.2.3 From 38cfff568169ff9f99784948f79f62ca1af5a187 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:22:47 +0100 Subject: dm-delay: fix bugs introduced by kthread mode This commit fixes the following bugs introduced by commit 70bbeb29fab0 ("dm delay: for short delays, use kthread instead of timers and wq"): * the function flush_worker_fn has no exit path - on unload, this function will just loop and consume 100% CPU without any progress * the wake-up mechanism in flush_worker_fn is racy - a wake up will be missed if the process adds entries to the delayed_bios list just before set_current_state(TASK_INTERRUPTIBLE) * flush_delayed_bios_fast submits a bio while holding a global mutex; this may deadlock if we have multiple stacked dm-delay devices and the underlying device attempts to acquire the mutex too * if the target constructor fails, it will call delay_dtr. delay_dtr would attempt to free dc->timer_lock without it being initialized by the constructor. * if the target constructor's kthread allocation fails, delay_dtr would crash trying to dereference dc->worker because it is non-NULL due to ERR_PTR. Fixes: 70bbeb29fab0 ("dm delay: for short delays, use kthread instead of timers and wq") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-delay.c | 61 +++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2d6b900e4353..00c09d9fffeb 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -73,9 +73,23 @@ static inline bool delay_is_fast(struct delay_c *dc) return !!dc->worker; } +static void flush_bios(struct bio *bio) +{ + struct bio *n; + + while (bio) { + n = bio->bi_next; + bio->bi_next = NULL; + dm_submit_bio_remap(bio, NULL); + bio = n; + } +} + static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) { struct dm_delay_info *delayed, *next; + struct bio_list flush_bio_list; + bio_list_init(&flush_bio_list); mutex_lock(&delayed_bios_lock); list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { @@ -83,47 +97,42 @@ static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) struct bio *bio = dm_bio_from_per_bio_data(delayed, sizeof(struct dm_delay_info)); list_del(&delayed->list); - dm_submit_bio_remap(bio, NULL); + bio_list_add(&flush_bio_list, bio); delayed->class->ops--; } } mutex_unlock(&delayed_bios_lock); + + flush_bios(bio_list_get(&flush_bio_list)); } static int flush_worker_fn(void *data) { struct delay_c *dc = data; - while (1) { + while (!kthread_should_stop()) { flush_delayed_bios_fast(dc, false); + mutex_lock(&delayed_bios_lock); if (unlikely(list_empty(&dc->delayed_bios))) { set_current_state(TASK_INTERRUPTIBLE); + mutex_unlock(&delayed_bios_lock); schedule(); - } else + } else { + mutex_unlock(&delayed_bios_lock); cond_resched(); + } } return 0; } -static void flush_bios(struct bio *bio) -{ - struct bio *n; - - while (bio) { - n = bio->bi_next; - bio->bi_next = NULL; - dm_submit_bio_remap(bio, NULL); - bio = n; - } -} - -static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) +static void flush_delayed_bios(struct delay_c *dc, bool flush_all) { struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; unsigned long start_timer = 0; - struct bio_list flush_bios = { }; + struct bio_list flush_bio_list; + bio_list_init(&flush_bio_list); mutex_lock(&delayed_bios_lock); list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { @@ -131,7 +140,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) struct bio *bio = dm_bio_from_per_bio_data(delayed, sizeof(struct dm_delay_info)); list_del(&delayed->list); - bio_list_add(&flush_bios, bio); + bio_list_add(&flush_bio_list, bio); delayed->class->ops--; continue; } @@ -147,7 +156,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) if (start_timer) queue_timeout(dc, next_expires); - return bio_list_get(&flush_bios); + flush_bios(bio_list_get(&flush_bio_list)); } static void flush_expired_bios(struct work_struct *work) @@ -158,7 +167,7 @@ static void flush_expired_bios(struct work_struct *work) if (delay_is_fast(dc)) flush_delayed_bios_fast(dc, false); else - flush_bios(flush_delayed_bios(dc, false)); + flush_delayed_bios(dc, false); } static void delay_dtr(struct dm_target *ti) @@ -177,8 +186,7 @@ static void delay_dtr(struct dm_target *ti) if (dc->worker) kthread_stop(dc->worker); - if (!delay_is_fast(dc)) - mutex_destroy(&dc->timer_lock); + mutex_destroy(&dc->timer_lock); kfree(dc); } @@ -236,6 +244,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = dc; INIT_LIST_HEAD(&dc->delayed_bios); + mutex_init(&dc->timer_lock); dc->may_delay = true; dc->argc = argc; @@ -282,12 +291,12 @@ out: "dm-delay-flush-worker"); if (IS_ERR(dc->worker)) { ret = PTR_ERR(dc->worker); + dc->worker = NULL; goto bad; } } else { timer_setup(&dc->delay_timer, handle_delayed_timer, 0); INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); - mutex_init(&dc->timer_lock); dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (!dc->kdelayd_wq) { ret = -EINVAL; @@ -345,11 +354,11 @@ static void delay_presuspend(struct dm_target *ti) dc->may_delay = false; mutex_unlock(&delayed_bios_lock); - if (delay_is_fast(dc)) + if (delay_is_fast(dc)) { flush_delayed_bios_fast(dc, true); - else { + } else { del_timer_sync(&dc->delay_timer); - flush_bios(flush_delayed_bios(dc, true)); + flush_delayed_bios(dc, true); } } -- cgit v1.2.3 From ccadc8a21ef13eb80006ecff6a7466810e4f0dd6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:24:04 +0100 Subject: dm-delay: avoid duplicate logic This is small refactoring of dm-delay - we avoid duplicate logic in flush_delayed_bios and flush_delayed_bios_fast and join these two functions into one. We also add cond_resched() to flush_delayed_bios because the list may have unbounded number of entries. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-delay.c | 65 +++++++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 44 deletions(-) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 00c09d9fffeb..5eabdb06c649 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -85,24 +85,40 @@ static void flush_bios(struct bio *bio) } } -static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) +static void flush_delayed_bios(struct delay_c *dc, bool flush_all) { struct dm_delay_info *delayed, *next; struct bio_list flush_bio_list; + unsigned long next_expires = 0; + bool start_timer = false; bio_list_init(&flush_bio_list); mutex_lock(&delayed_bios_lock); list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { + cond_resched(); if (flush_all || time_after_eq(jiffies, delayed->expires)) { struct bio *bio = dm_bio_from_per_bio_data(delayed, sizeof(struct dm_delay_info)); list_del(&delayed->list); bio_list_add(&flush_bio_list, bio); delayed->class->ops--; + continue; + } + + if (!delay_is_fast(dc)) { + if (!start_timer) { + start_timer = true; + next_expires = delayed->expires; + } else { + next_expires = min(next_expires, delayed->expires); + } } } mutex_unlock(&delayed_bios_lock); + if (start_timer) + queue_timeout(dc, next_expires); + flush_bios(bio_list_get(&flush_bio_list)); } @@ -111,7 +127,7 @@ static int flush_worker_fn(void *data) struct delay_c *dc = data; while (!kthread_should_stop()) { - flush_delayed_bios_fast(dc, false); + flush_delayed_bios(dc, false); mutex_lock(&delayed_bios_lock); if (unlikely(list_empty(&dc->delayed_bios))) { set_current_state(TASK_INTERRUPTIBLE); @@ -126,48 +142,12 @@ static int flush_worker_fn(void *data) return 0; } -static void flush_delayed_bios(struct delay_c *dc, bool flush_all) -{ - struct dm_delay_info *delayed, *next; - unsigned long next_expires = 0; - unsigned long start_timer = 0; - struct bio_list flush_bio_list; - bio_list_init(&flush_bio_list); - - mutex_lock(&delayed_bios_lock); - list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { - if (flush_all || time_after_eq(jiffies, delayed->expires)) { - struct bio *bio = dm_bio_from_per_bio_data(delayed, - sizeof(struct dm_delay_info)); - list_del(&delayed->list); - bio_list_add(&flush_bio_list, bio); - delayed->class->ops--; - continue; - } - - if (!start_timer) { - start_timer = 1; - next_expires = delayed->expires; - } else - next_expires = min(next_expires, delayed->expires); - } - mutex_unlock(&delayed_bios_lock); - - if (start_timer) - queue_timeout(dc, next_expires); - - flush_bios(bio_list_get(&flush_bio_list)); -} - static void flush_expired_bios(struct work_struct *work) { struct delay_c *dc; dc = container_of(work, struct delay_c, flush_expired_bios); - if (delay_is_fast(dc)) - flush_delayed_bios_fast(dc, false); - else - flush_delayed_bios(dc, false); + flush_delayed_bios(dc, false); } static void delay_dtr(struct dm_target *ti) @@ -354,12 +334,9 @@ static void delay_presuspend(struct dm_target *ti) dc->may_delay = false; mutex_unlock(&delayed_bios_lock); - if (delay_is_fast(dc)) { - flush_delayed_bios_fast(dc, true); - } else { + if (!delay_is_fast(dc)) del_timer_sync(&dc->delay_timer); - flush_delayed_bios(dc, true); - } + flush_delayed_bios(dc, true); } static void delay_resume(struct dm_target *ti) -- cgit v1.2.3 From 2a695062a5a42aead8c539a344168d4806b3fda2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:36:34 +0100 Subject: dm-bufio: fix no-sleep mode dm-bufio has a no-sleep mode. When activated (with the DM_BUFIO_CLIENT_NO_SLEEP flag), the bufio client is read-only and we could call dm_bufio_get from tasklets. This is used by dm-verity. Unfortunately, commit 450e8dee51aa ("dm bufio: improve concurrent IO performance") broke this and the kernel would warn that cache_get() was calling down_read() from no-sleeping context. The bug can be reproduced by using "veritysetup open" with the "--use-tasklets" flag. This commit fixes dm-bufio, so that the tasklet mode works again, by expanding use of the 'no_sleep_enabled' static_key to conditionally use either a rw_semaphore or rwlock_t (which are colocated in the buffer_tree structure using a union). Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v6.4 Fixes: 450e8dee51aa ("dm bufio: improve concurrent IO performance") Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 87 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 62eb27639c9b..f03d7dba270c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -254,7 +254,7 @@ enum evict_result { typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); -static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context) +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) { unsigned long tested = 0; struct list_head *h = lru->cursor; @@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con h = h->next; - cond_resched(); + if (!no_sleep) + cond_resched(); } return NULL; @@ -382,7 +383,10 @@ struct dm_buffer { */ struct buffer_tree { - struct rw_semaphore lock; + union { + struct rw_semaphore lock; + rwlock_t spinlock; + } u; struct rb_root root; } ____cacheline_aligned_in_smp; @@ -393,9 +397,12 @@ struct dm_buffer_cache { * on the locks. */ unsigned int num_locks; + bool no_sleep; struct buffer_tree trees[]; }; +static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); + static inline unsigned int cache_index(sector_t block, unsigned int num_locks) { return dm_hash_locks_index(block, num_locks); @@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks) static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) { - down_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) { - down_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } /* @@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool static void __lh_lock(struct lock_history *lh, unsigned int index) { - if (lh->write) - down_write(&lh->cache->trees[index].lock); - else - down_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_read(&lh->cache->trees[index].u.lock); + } } static void __lh_unlock(struct lock_history *lh, unsigned int index) { - if (lh->write) - up_write(&lh->cache->trees[index].lock); - else - up_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_read(&lh->cache->trees[index].u.lock); + } } /* @@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l) return le_to_buffer(le); } -static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks) +static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep) { unsigned int i; bc->num_locks = num_locks; + bc->no_sleep = no_sleep; for (i = 0; i < bc->num_locks; i++) { - init_rwsem(&bc->trees[i].lock); + if (no_sleep) + rwlock_init(&bc->trees[i].u.spinlock); + else + init_rwsem(&bc->trees[i].u.lock); bc->trees[i].root = RB_ROOT; } @@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode struct lru_entry *le; struct dm_buffer *b; - le = lru_evict(&bc->lru[list_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); if (!le) return NULL; @@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_ struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; while (true) { - le = lru_evict(&bc->lru[old_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); if (!le) break; @@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc, { unsigned int i; + BUG_ON(bc->no_sleep); for (i = 0; i < bc->num_locks; i++) { - down_write(&bc->trees[i].lock); + down_write(&bc->trees[i].u.lock); __remove_range(bc, &bc->trees[i].root, begin, end, pred, release); - up_write(&bc->trees[i].lock); + up_write(&bc->trees[i].u.lock); } } @@ -979,8 +1017,6 @@ struct dm_bufio_client { struct dm_buffer_cache cache; /* must be last member */ }; -static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); - /*----------------------------------------------------------------*/ #define dm_bufio_in_request() (!!current->bio_list) @@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, if (need_submit) submit_io(b, REQ_OP_READ, read_endio); - wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); + if (nf != NF_GET) /* we already tested this condition above */ + wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); if (b->read_error) { int error = blk_status_to_errno(b->read_error); @@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign r = -ENOMEM; goto bad_client; } - cache_init(&c->cache, num_locks); + cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0); c->bdev = bdev; c->block_size = block_size; -- cgit v1.2.3 From 28f07f2ab4b3a2714f1fefcc58ada4bcc195f806 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:37:25 +0100 Subject: dm-verity: don't use blocking calls from tasklets The commit 5721d4e5a9cd enhanced dm-verity, so that it can verify blocks from tasklets rather than from workqueues. This reportedly improves performance significantly. However, dm-verity was using the flag CRYPTO_TFM_REQ_MAY_SLEEP from tasklets which resulted in warnings about sleeping function being called from non-sleeping context. BUG: sleeping function called from invalid context at crypto/internal.h:206 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 14, name: ksoftirqd/0 preempt_count: 100, expected: 0 RCU nest depth: 0, expected: 0 CPU: 0 PID: 14 Comm: ksoftirqd/0 Tainted: G W 6.7.0-rc1 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Call Trace: dump_stack_lvl+0x32/0x50 __might_resched+0x110/0x160 crypto_hash_walk_done+0x54/0xb0 shash_ahash_update+0x51/0x60 verity_hash_update.isra.0+0x4a/0x130 [dm_verity] verity_verify_io+0x165/0x550 [dm_verity] ? free_unref_page+0xdf/0x170 ? psi_group_change+0x113/0x390 verity_tasklet+0xd/0x70 [dm_verity] tasklet_action_common.isra.0+0xb3/0xc0 __do_softirq+0xaf/0x1ec ? smpboot_thread_fn+0x1d/0x200 ? sort_range+0x20/0x20 run_ksoftirqd+0x15/0x30 smpboot_thread_fn+0xed/0x200 kthread+0xdc/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x28/0x40 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 This commit fixes dm-verity so that it doesn't use the flags CRYPTO_TFM_REQ_MAY_SLEEP and CRYPTO_TFM_REQ_MAY_BACKLOG from tasklets. The crypto API would do GFP_ATOMIC allocation instead, it could return -ENOMEM and we catch -ENOMEM in verity_tasklet and requeue the request to the workqueue. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v6.0+ Fixes: 5721d4e5a9cd ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 4 ++-- drivers/md/dm-verity-target.c | 23 ++++++++++++----------- drivers/md/dm-verity.h | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 3ef9f018da60..2099c755119e 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, { if (unlikely(verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io)))) + verity_io_real_digest(v, io), true))) return 0; return memcmp(verity_io_real_digest(v, io), want_digest, @@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, /* Always re-validate the corrected block against the expected hash */ r = verity_hash(v, verity_io_hash_req(v, io), fio->output, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io)); + verity_io_real_digest(v, io), true); if (unlikely(r < 0)) return r; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 26adcfea0302..e115fcfe723c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, * Wrapper for crypto_ahash_init, which handles verity salting. */ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, - struct crypto_wait *wait) + struct crypto_wait *wait, bool may_sleep) { int r; ahash_request_set_tfm(req, v->tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, (void *)wait); + ahash_request_set_callback(req, + may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0, + crypto_req_done, (void *)wait); crypto_init_wait(wait); r = crypto_wait_req(crypto_ahash_init(req), wait); if (unlikely(r < 0)) { - DMERR("crypto_ahash_init failed: %d", r); + if (r != -ENOMEM) + DMERR("crypto_ahash_init failed: %d", r); return r; } @@ -179,12 +180,12 @@ out: } int verity_hash(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, u8 *digest) + const u8 *data, size_t len, u8 *digest, bool may_sleep) { int r; struct crypto_wait wait; - r = verity_hash_init(v, req, &wait); + r = verity_hash_init(v, req, &wait, may_sleep); if (unlikely(r < 0)) goto out; @@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, r = verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->hash_dev_block_bits, - verity_io_real_digest(v, io)); + verity_io_real_digest(v, io), !io->in_tasklet); if (unlikely(r < 0)) goto release_ret_r; @@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) continue; } - r = verity_hash_init(v, req, &wait); + r = verity_hash_init(v, req, &wait, !io->in_tasklet); if (unlikely(r < 0)) return r; @@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data) io->in_tasklet = true; err = verity_verify_io(io); - if (err == -EAGAIN) { + if (err == -EAGAIN || err == -ENOMEM) { /* fallback to retrying with work-queue */ INIT_WORK(&io->work, verity_work); queue_work(io->v->verify_wq, &io->work); @@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v) goto out; r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits, - v->zero_digest); + v->zero_digest, true); out: kfree(req); diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 2f555b420367..f96f4e281ee4 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len)); extern int verity_hash(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, u8 *digest); + const u8 *data, size_t len, u8 *digest, bool may_sleep); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); -- cgit v1.2.3 From 13648e04a9b831b3dfa5cf3887dfa6cf8fe5fe69 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:38:33 +0100 Subject: dm-crypt: start allocating with MAX_ORDER Commit 23baf831a32c ("mm, treewide: redefine MAX_ORDER sanely") changed the meaning of MAX_ORDER from exclusive to inclusive. So, we can allocate compound pages with up to 1 << MAX_ORDER pages. Reflect this change in dm-crypt and start trying to allocate compound pages with MAX_ORDER. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6de107aff331..2ae8560b6a14 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; unsigned int remaining_size; - unsigned int order = MAX_ORDER - 1; + unsigned int order = MAX_ORDER; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) -- cgit v1.2.3