summaryrefslogtreecommitdiff
path: root/fs/bcachefs/extent_update.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-12-30 22:37:25 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:08:36 +0300
commite3e464ac6d09269b19cea3dc32b626db44d0e6ba (patch)
tree7aafd377933161ed88573a5e3dab7ee3d8e0e06a /fs/bcachefs/extent_update.c
parent57b0b3db475de6b724e4db3b827c00484cdde642 (diff)
downloadlinux-e3e464ac6d09269b19cea3dc32b626db44d0e6ba.tar.xz
bcachefs: Move extent overwrite handling out of core btree code
Ever since the btree code was first written, handling of overwriting existing extents - including partially overwriting and splittin existing extents - was handled as part of the core btree insert path. The modern transaction and iterator infrastructure didn't exist then, so that was the only way for it to be done. This patch moves that outside of the core btree code to a pass that runs at transaction commit time. This is a significant simplification to the btree code and overall reduction in code size, but more importantly it gets us much closer to the core btree code being completely independent of extents and is important prep work for snapshots. This introduces a new feature bit; the old and new extent update models are incompatible when the filesystem needs journal replay. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/extent_update.c')
-rw-r--r--fs/bcachefs/extent_update.c410
1 files changed, 27 insertions, 383 deletions
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index 846d77dc2530..fa6c0698f385 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -39,6 +39,12 @@ static int count_iters_for_insert(struct btree_trans *trans,
{
int ret = 0;
+ /*
+ * The extent update path requires an _additional_ iterator for each
+ * extent we're inserting and overwriting:
+ */
+ *nr_iters += 1;
+
switch (k.k->type) {
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v:
@@ -167,402 +173,40 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
enum btree_insert_ret
bch2_extent_can_insert(struct btree_trans *trans,
struct btree_iter *iter,
- struct bkey_i *insert,
- unsigned *u64s)
+ struct bkey_i *insert)
{
struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter = l->iter;
struct bkey_packed *_k;
+ struct bkey_s_c k;
struct bkey unpacked;
int sectors;
- while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
- KEY_TYPE_discard))) {
- struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked);
- enum bch_extent_overlap overlap =
- bch2_extent_overlap(&insert->k, k.k);
-
- if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
- break;
-
- overlap = bch2_extent_overlap(&insert->k, k.k);
-
- /*
- * If we're overwriting an existing extent, we may need to emit
- * a whiteout - unless we're inserting a new extent at the same
- * position:
- */
- if (k.k->needs_whiteout &&
- (!bkey_whiteout(&insert->k) ||
- bkey_cmp(k.k->p, insert->k.p)))
- *u64s += BKEY_U64s;
-
- /*
- * If we're partially overwriting an existing extent which has
- * been written out to disk, we'll need to emit a new version of
- * that extent:
- */
- if (bkey_written(l->b, _k) &&
- overlap != BCH_EXTENT_OVERLAP_ALL)
- *u64s += _k->u64s;
-
- /* And we may be splitting an existing extent: */
- if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
- *u64s += _k->u64s;
-
- if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
- (sectors = bch2_bkey_sectors_compressed(k))) {
- int flags = trans->flags & BTREE_INSERT_NOFAIL
- ? BCH_DISK_RESERVATION_NOFAIL : 0;
-
- switch (bch2_disk_reservation_add(trans->c,
- trans->disk_res,
- sectors, flags)) {
- case 0:
- break;
- case -ENOSPC:
- return BTREE_INSERT_ENOSPC;
- default:
- BUG();
- }
- }
-
- if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
- overlap == BCH_EXTENT_OVERLAP_MIDDLE)
- break;
-
- bch2_btree_node_iter_advance(&node_iter, l->b);
- }
-
- return BTREE_INSERT_OK;
-}
-
-static void verify_extent_nonoverlapping(struct bch_fs *c,
- struct btree *b,
- struct btree_node_iter *_iter,
- struct bkey_i *insert)
-{
-#ifdef CONFIG_BCACHEFS_DEBUG
- struct btree_node_iter iter;
- struct bkey_packed *k;
- struct bkey uk;
-
- if (!expensive_debug_checks(c))
- return;
-
- iter = *_iter;
- k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
- BUG_ON(k &&
- (uk = bkey_unpack_key(b, k),
- bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
-
- iter = *_iter;
- k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard);
-#if 0
- BUG_ON(k &&
- (uk = bkey_unpack_key(b, k),
- bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0);
-#else
- if (k &&
- (uk = bkey_unpack_key(b, k),
- bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) {
- char buf1[100];
- char buf2[100];
-
- bch2_bkey_to_text(&PBUF(buf1), &insert->k);
- bch2_bkey_to_text(&PBUF(buf2), &uk);
-
- bch2_dump_btree_node(b);
- panic("insert > next :\n"
- "insert %s\n"
- "next %s\n",
- buf1, buf2);
- }
-#endif
-
-#endif
-}
-
-static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
- struct bkey_i *insert)
-{
- struct btree_iter_level *l = &iter->l[0];
- struct bkey_packed *k =
- bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b));
-
- BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b));
-
- EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
- verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
-
- if (debug_check_bkeys(c))
- bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));
-
- bch2_bset_insert(l->b, &l->iter, k, insert, 0);
- bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
-}
-
-static void pack_push_whiteout(struct bch_fs *c, struct btree *b,
- struct bpos pos)
-{
- struct bkey_packed k;
-
- if (!bkey_pack_pos(&k, pos, b)) {
- struct bkey_i tmp;
-
- bkey_init(&tmp.k);
- tmp.k.p = pos;
- bkey_copy(&k, &tmp);
- }
-
- k.needs_whiteout = true;
- push_whiteout(c, b, &k);
-}
-
-static void
-extent_drop(struct bch_fs *c, struct btree_iter *iter,
- struct bkey_packed *_k, struct bkey_s k)
-{
- struct btree_iter_level *l = &iter->l[0];
-
- if (!bkey_whiteout(k.k))
- btree_account_key_drop(l->b, _k);
-
- k.k->size = 0;
- k.k->type = KEY_TYPE_deleted;
-
- if (!btree_node_old_extent_overwrite(l->b) &&
- k.k->needs_whiteout) {
- pack_push_whiteout(c, l->b, k.k->p);
- k.k->needs_whiteout = false;
- }
-
- if (_k >= btree_bset_last(l->b)->start) {
- unsigned u64s = _k->u64s;
-
- bch2_bset_delete(l->b, _k, _k->u64s);
- bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0);
- } else {
- extent_save(l->b, _k, k.k);
- bch2_btree_iter_fix_key_modified(iter, l->b, _k);
- }
-}
-
-static void
-extent_squash(struct bch_fs *c, struct btree_iter *iter,
- struct bkey_i *insert,
- struct bkey_packed *_k, struct bkey_s k,
- enum bch_extent_overlap overlap)
-{
- struct btree_iter_level *l = &iter->l[0];
- struct bkey_on_stack tmp, split;
-
- bkey_on_stack_init(&tmp);
- bkey_on_stack_init(&split);
-
- if (!btree_node_old_extent_overwrite(l->b)) {
- if (!bkey_whiteout(&insert->k) &&
- !bkey_cmp(k.k->p, insert->k.p)) {
- insert->k.needs_whiteout = k.k->needs_whiteout;
- k.k->needs_whiteout = false;
- }
- } else {
- insert->k.needs_whiteout |= k.k->needs_whiteout;
- }
-
- switch (overlap) {
- case BCH_EXTENT_OVERLAP_FRONT:
- if (bkey_written(l->b, _k)) {
- bkey_on_stack_reassemble(&tmp, c, k.s_c);
- bch2_cut_front(insert->k.p, tmp.k);
-
- /*
- * needs_whiteout was propagated to new version of @k,
- * @tmp:
- */
- if (!btree_node_old_extent_overwrite(l->b))
- k.k->needs_whiteout = false;
-
- extent_drop(c, iter, _k, k);
- extent_bset_insert(c, iter, tmp.k);
- } else {
- btree_keys_account_val_delta(l->b, _k,
- bch2_cut_front_s(insert->k.p, k));
-
- extent_save(l->b, _k, k.k);
- /*
- * No need to call bset_fix_invalidated_key, start of
- * extent changed but extents are indexed by where they
- * end
- */
- bch2_btree_iter_fix_key_modified(iter, l->b, _k);
- }
- break;
- case BCH_EXTENT_OVERLAP_BACK:
- if (bkey_written(l->b, _k)) {
- bkey_on_stack_reassemble(&tmp, c, k.s_c);
- bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);
-
- /*
- * @tmp has different position than @k, needs_whiteout
- * should not be propagated:
- */
- if (!btree_node_old_extent_overwrite(l->b))
- tmp.k->k.needs_whiteout = false;
-
- extent_drop(c, iter, _k, k);
- extent_bset_insert(c, iter, tmp.k);
- } else {
- /*
- * position of @k is changing, emit a whiteout if
- * needs_whiteout is set:
- */
- if (!btree_node_old_extent_overwrite(l->b) &&
- k.k->needs_whiteout) {
- pack_push_whiteout(c, l->b, k.k->p);
- k.k->needs_whiteout = false;
- }
-
- btree_keys_account_val_delta(l->b, _k,
- bch2_cut_back_s(bkey_start_pos(&insert->k), k));
- extent_save(l->b, _k, k.k);
-
- bch2_bset_fix_invalidated_key(l->b, _k);
- bch2_btree_node_iter_fix(iter, l->b, &l->iter,
- _k, _k->u64s, _k->u64s);
- }
- break;
- case BCH_EXTENT_OVERLAP_ALL:
- extent_drop(c, iter, _k, k);
- break;
- case BCH_EXTENT_OVERLAP_MIDDLE:
- bkey_on_stack_reassemble(&split, c, k.s_c);
- bch2_cut_back(bkey_start_pos(&insert->k), split.k);
-
- if (!btree_node_old_extent_overwrite(l->b))
- split.k->k.needs_whiteout = false;
-
- /* this is identical to BCH_EXTENT_OVERLAP_FRONT: */
- if (bkey_written(l->b, _k)) {
- bkey_on_stack_reassemble(&tmp, c, k.s_c);
- bch2_cut_front(insert->k.p, tmp.k);
-
- if (!btree_node_old_extent_overwrite(l->b))
- k.k->needs_whiteout = false;
-
- extent_drop(c, iter, _k, k);
- extent_bset_insert(c, iter, tmp.k);
- } else {
- btree_keys_account_val_delta(l->b, _k,
- bch2_cut_front_s(insert->k.p, k));
-
- extent_save(l->b, _k, k.k);
- bch2_btree_iter_fix_key_modified(iter, l->b, _k);
- }
-
- extent_bset_insert(c, iter, split.k);
- break;
- }
-
- bkey_on_stack_exit(&split, c);
- bkey_on_stack_exit(&tmp, c);
-}
+ _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
+ KEY_TYPE_discard);
+ if (!_k)
+ return BTREE_INSERT_OK;
-/**
- * bch_extent_insert_fixup - insert a new extent and deal with overlaps
- *
- * this may result in not actually doing the insert, or inserting some subset
- * of the insert key. For cmpxchg operations this is where that logic lives.
- *
- * All subsets of @insert that need to be inserted are inserted using
- * bch2_btree_insert_and_journal(). If @b or @res fills up, this function
- * returns false, setting @iter->pos for the prefix of @insert that actually got
- * inserted.
- *
- * BSET INVARIANTS: this function is responsible for maintaining all the
- * invariants for bsets of extents in memory. things get really hairy with 0
- * size extents
- *
- * within one bset:
- *
- * bkey_start_pos(bkey_next(k)) >= k
- * or bkey_start_offset(bkey_next(k)) >= k->offset
- *
- * i.e. strict ordering, no overlapping extents.
- *
- * multiple bsets (i.e. full btree node):
- *
- * ∀ k, j
- * k.size != 0 ∧ j.size != 0 →
- * ¬ (k > bkey_start_pos(j) ∧ k < j)
- *
- * i.e. no two overlapping keys _of nonzero size_
- *
- * We can't realistically maintain this invariant for zero size keys because of
- * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j
- * there may be another 0 size key between them in another bset, and it will
- * thus overlap with the merged key.
- *
- * In addition, the end of iter->pos indicates how much has been processed.
- * If the end of iter->pos is not the same as the end of insert, then
- * key insertion needs to continue/be retried.
- */
-void bch2_insert_fixup_extent(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_i *insert)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter_level *l = &iter->l[0];
- struct btree_node_iter node_iter = l->iter;
- bool do_update = !bkey_whiteout(&insert->k);
- struct bkey_packed *_k;
- struct bkey unpacked;
+ k = bkey_disassemble(l->b, _k, &unpacked);
- EBUG_ON(iter->level);
- EBUG_ON(!insert->k.size);
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
+ /* Check if we're splitting a compressed extent: */
- while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
- KEY_TYPE_discard))) {
- struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
- enum bch_extent_overlap overlap =
- bch2_extent_overlap(&insert->k, k.k);
+ if (bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k)) > 0 &&
+ bkey_cmp(insert->k.p, k.k->p) < 0 &&
+ (sectors = bch2_bkey_sectors_compressed(k))) {
+ int flags = trans->flags & BTREE_INSERT_NOFAIL
+ ? BCH_DISK_RESERVATION_NOFAIL : 0;
- if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
+ switch (bch2_disk_reservation_add(trans->c, trans->disk_res,
+ sectors, flags)) {
+ case 0:
break;
-
- if (!bkey_whiteout(k.k))
- do_update = true;
-
- if (!do_update) {
- struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
-
- bch2_cut_front(cur_end, insert);
- bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
- } else {
- extent_squash(c, iter, insert, _k, k, overlap);
+ case -ENOSPC:
+ return BTREE_INSERT_ENOSPC;
+ default:
+ BUG();
}
-
- node_iter = l->iter;
-
- if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
- overlap == BCH_EXTENT_OVERLAP_MIDDLE)
- break;
}
- l->iter = node_iter;
- bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p);
-
- if (do_update) {
- if (insert->k.type == KEY_TYPE_deleted)
- insert->k.type = KEY_TYPE_discard;
-
- if (!bkey_whiteout(&insert->k) ||
- btree_node_old_extent_overwrite(l->b))
- extent_bset_insert(c, iter, insert);
-
- bch2_btree_journal_key(trans, iter, insert);
- }
-
- bch2_cut_front(insert->k.p, insert);
+ return BTREE_INSERT_OK;
}