From cd404e5b05eb40f3ff27ab9d796f41f0a3b5a2c4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 12 Nov 2023 20:20:35 -0500 Subject: bcachefs: backpointers fsck no longer uses BTREE_ITER_ALL_LEVELS It appears that BTREE_ITER_ALL_LEVELS is racy with concurrent interior node btree updates; unfortunate but not terribly surprising it's a difficult problem - that was the original reason for gc_lock. BTREE_ITER_ALL_LEVELS will probably be deleted in a subsequent patch, this changes backpointers fsck to instead walk keys at one level of the btree at a time. This fixes the tiering_drop_alloc test, which stopped working with the patch to not flush the journal after journal replay. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 127 ++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 64 deletions(-) (limited to 'fs/bcachefs/backpointers.c') diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 5025a71ad685..69bd6c281d1a 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -426,11 +426,13 @@ static int check_bp_exists(struct btree_trans *trans, memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { if (last_flushed->level != bp.level || !bpos_eq(last_flushed->pos, orig_k.k->p)) { + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; + last_flushed->level = bp.level; last_flushed->pos = orig_k.k->p; - - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; + ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } goto missing; @@ -457,25 +459,18 @@ missing: } static int check_extent_to_backpointers(struct btree_trans *trans, - struct btree_iter *iter, + enum btree_id btree, unsigned level, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed) + struct bpos_level *last_flushed, + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs; const union bch_extent_entry *entry; struct extent_ptr_decoded p; - struct bkey_s_c k; int ret; - k = bch2_btree_iter_peek_all_levels(iter); - ret = bkey_err(k); - if (ret) - return ret; - if (!k.k) - return 0; - ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { struct bpos bucket_pos; @@ -484,7 +479,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, if (p.ptr.cached) continue; - bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level, + bch2_extent_ptr_to_bp(c, btree, level, k, p, &bucket_pos, &bp); ret = check_bp_exists(trans, bucket_pos, bp, k, @@ -501,44 +496,33 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, enum btree_id btree_id, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed) + struct bpos_level *last_flushed, + int *level) { struct bch_fs *c = trans->c; - struct btree_root *r = bch2_btree_id_root(c, btree_id); struct btree_iter iter; struct btree *b; struct bkey_s_c k; - struct bkey_ptrs_c ptrs; - struct extent_ptr_decoded p; - const union bch_extent_entry *entry; int ret; - - bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0); +retry: + bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, + 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); b = bch2_btree_iter_peek_node(&iter); ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; - BUG_ON(b != btree_node_root(c, b)); - - k = bkey_i_to_s_c(&b->key); - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bpos bucket_pos; - struct bch_backpointer bp; - - if (p.ptr.cached) - continue; + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry; + } - bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1, - k, p, &bucket_pos, &bp); + *level = b->c.level; - ret = check_bp_exists(trans, bucket_pos, bp, k, + k = bkey_i_to_s_c(&b->key); + ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1, bucket_start, bucket_end, - last_flushed); - if (ret) - goto err; - } + last_flushed, k); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -616,43 +600,58 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; enum btree_id btree_id; - struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; + struct bkey_s_c k; int ret = 0; for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; - - bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, - depth, - BTREE_ITER_ALL_LEVELS| - BTREE_ITER_PREFETCH); - - do { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| - BCH_TRANS_COMMIT_no_enospc, - check_extent_to_backpointers(trans, &iter, - bucket_start, bucket_end, - &last_flushed)); - if (ret) - break; - } while (!bch2_btree_iter_advance(&iter)); - - bch2_trans_iter_exit(trans, &iter); - - if (ret) - break; + struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; + int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc, check_btree_root_to_backpointers(trans, btree_id, bucket_start, bucket_end, - &last_flushed)); + &last_flushed, &level)); if (ret) - break; + return ret; + + while (level >= depth) { + bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, + level, + BTREE_ITER_PREFETCH); + while (1) { + bch2_trans_begin(trans); + k = bch2_btree_iter_peek(&iter); + if (!k.k) + break; + ret = bkey_err(k) ?: + check_extent_to_backpointers(trans, btree_id, level, + bucket_start, bucket_end, + &last_flushed, k) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } + if (ret) + break; + if (bpos_eq(iter.pos, SPOS_MAX)) + break; + bch2_btree_iter_advance(&iter); + } + bch2_trans_iter_exit(trans, &iter); + + if (ret) + return ret; + + --level; + } } - return ret; + + return 0; } static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c, -- cgit v1.2.3