summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-01-03 01:53:02 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:09:52 +0300
commit7ffb6a7ec6712eb1ba84a80137c2f712e67c4171 (patch)
tree0d05e725b41817a28164f1ba6ee19747d010bbf6
parentdbe17f18838df6d0facf51b43cdc5efd372c28d6 (diff)
downloadlinux-7ffb6a7ec6712eb1ba84a80137c2f712e67c4171.tar.xz
bcachefs: Fix deadlock on nocow locks in data move path
The recent nocow locking rework introduced a deadlock in the data move path: the new nocow locking scheme uses a hash table with a fixed size array for chaining, meaning on hash collision we may have to wait for other locks to be released before we can lock a bucket. And since the data move path needs to submit writes from the same thread that's taking nocow locks and submitting reads, this introduces a deadlock. This shouldn't happen often in practice, but since the data move path can keep large numbers of IOs in flight simultaneously, it's something we have to handle. This patch makes move_ctxt_wait_event() available to bch2_data_update_init() and uses it when appropriate, which is our normal solution to this kind of thing. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/data_update.c22
-rw-r--r--fs/bcachefs/data_update.h3
-rw-r--r--fs/bcachefs/io.c28
-rw-r--r--fs/bcachefs/move.c21
-rw-r--r--fs/bcachefs/move.h13
5 files changed, 55 insertions, 32 deletions
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index eb248968de48..91bc95b8ecb9 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -397,13 +397,16 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
-int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
+int bch2_data_update_init(struct btree_trans *trans,
+ struct moving_context *ctxt,
+ struct data_update *m,
struct write_point_specifier wp,
struct bch_io_opts io_opts,
struct data_update_opts data_opts,
enum btree_id btree_id,
struct bkey_s_c k)
{
+ struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -460,8 +463,21 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
i++;
- bch2_bucket_nocow_lock(&c->nocow_locks,
- PTR_BUCKET_POS(c, &p.ptr), 0);
+ if (ctxt) {
+ bool locked;
+
+ move_ctxt_wait_event(ctxt, trans,
+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0)) ||
+ !atomic_read(&ctxt->read_sectors));
+
+ if (!locked)
+ bch2_bucket_nocow_lock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0);
+ } else {
+ bch2_bucket_nocow_lock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0);
+ }
}
if (reserve_sectors) {
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index f304c3366226..49e9055cbb52 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -33,7 +33,8 @@ void bch2_data_update_read_done(struct data_update *,
void bch2_data_update_exit(struct data_update *);
void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-int bch2_data_update_init(struct bch_fs *, struct data_update *,
+int bch2_data_update_init(struct btree_trans *, struct moving_context *,
+ struct data_update *,
struct write_point_specifier,
struct bch_io_opts, struct data_update_opts,
enum btree_id, struct bkey_s_c);
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index fe0c4b58e525..93d300812b4b 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -1961,7 +1961,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
bch2_data_update_read_done(&op->write, rbio->pick.crc);
}
-static struct promote_op *__promote_alloc(struct bch_fs *c,
+static struct promote_op *__promote_alloc(struct btree_trans *trans,
enum btree_id btree_id,
struct bkey_s_c k,
struct bpos pos,
@@ -1970,6 +1970,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
unsigned sectors,
struct bch_read_bio **rbio)
{
+ struct bch_fs *c = trans->c;
struct promote_op *op = NULL;
struct bio *bio;
unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
@@ -2013,7 +2014,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
bio = &op->write.op.wbio.bio;
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
- ret = bch2_data_update_init(c, &op->write,
+ ret = bch2_data_update_init(trans, NULL, &op->write,
writepoint_hashed((unsigned long) current),
opts,
(struct data_update_opts) {
@@ -2037,16 +2038,17 @@ err:
}
noinline
-static struct promote_op *promote_alloc(struct bch_fs *c,
- struct bvec_iter iter,
- struct bkey_s_c k,
- struct extent_ptr_decoded *pick,
- struct bch_io_opts opts,
- unsigned flags,
- struct bch_read_bio **rbio,
- bool *bounce,
- bool *read_full)
+static struct promote_op *promote_alloc(struct btree_trans *trans,
+ struct bvec_iter iter,
+ struct bkey_s_c k,
+ struct extent_ptr_decoded *pick,
+ struct bch_io_opts opts,
+ unsigned flags,
+ struct bch_read_bio **rbio,
+ bool *bounce,
+ bool *read_full)
{
+ struct bch_fs *c = trans->c;
bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
/* data might have to be decompressed in the write path: */
unsigned sectors = promote_full
@@ -2060,7 +2062,7 @@ static struct promote_op *promote_alloc(struct bch_fs *c,
if (!should_promote(c, k, pos, opts, flags))
return NULL;
- promote = __promote_alloc(c,
+ promote = __promote_alloc(trans,
k.k->type == KEY_TYPE_reflink_v
? BTREE_ID_reflink
: BTREE_ID_extents,
@@ -2667,7 +2669,7 @@ retry_pick:
}
if (orig->opts.promote_target)
- promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
+ promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
&rbio, &bounce, &read_full);
if (!read_full) {
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 9e453b8495e8..d0ce656755d7 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -91,7 +91,7 @@ static void move_write(struct moving_io *io)
bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
}
-static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
{
struct moving_io *io =
list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
@@ -111,29 +111,20 @@ static void move_read_endio(struct bio *bio)
closure_put(&ctxt->cl);
}
-static void do_pending_writes(struct moving_context *ctxt, struct btree_trans *trans)
+void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
+ struct btree_trans *trans)
{
struct moving_io *io;
if (trans)
bch2_trans_unlock(trans);
- while ((io = next_pending_write(ctxt))) {
+ while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
list_del(&io->list);
move_write(io);
}
}
-#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
-do { \
- do_pending_writes(_ctxt, _trans); \
- \
- if (_cond) \
- break; \
- __wait_event((_ctxt)->wait, \
- next_pending_write(_ctxt) || (_cond)); \
-} while (1)
-
static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
struct btree_trans *trans)
{
@@ -299,8 +290,8 @@ static int bch2_move_extent(struct btree_trans *trans,
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
- ret = bch2_data_update_init(c, &io->write, ctxt->wp, io_opts,
- data_opts, btree_id, k);
+ ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
+ io_opts, data_opts, btree_id, k);
if (ret && ret != -BCH_ERR_unwritten_extent_update)
goto err_free_pages;
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index b14f679f6904..a2822d4a4afb 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -28,6 +28,16 @@ struct moving_context {
wait_queue_head_t wait;
};
+#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
+do { \
+ bch2_moving_ctxt_do_pending_writes(_ctxt, _trans); \
+ \
+ if (_cond) \
+ break; \
+ __wait_event((_ctxt)->wait, \
+ bch2_moving_ctxt_next_pending_write(_ctxt) || (_cond));\
+} while (1)
+
typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
struct bch_io_opts *, struct data_update_opts *);
@@ -35,6 +45,9 @@ void bch2_moving_ctxt_exit(struct moving_context *);
void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
struct bch_ratelimit *, struct bch_move_stats *,
struct write_point_specifier, bool);
+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
+void bch2_moving_ctxt_do_pending_writes(struct moving_context *,
+ struct btree_trans *);
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);