summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-04-15 21:58:00 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:08:20 +0300
commitc6dd04f8f5644d92361bb2d6e47fa9b4d5af6d79 (patch)
tree1064fd24845685f95801d72c8211e978dcd94a64
parentd07343561e263fcbbdb8042f35ca29a602190e18 (diff)
downloadlinux-c6dd04f8f5644d92361bb2d6e47fa9b4d5af6d79.tar.xz
bcachefs: Mark overwrites from journal replay in initial gc
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_gc.c35
-rw-r--r--fs/bcachefs/btree_update.h4
-rw-r--r--fs/bcachefs/btree_update_leaf.c46
-rw-r--r--fs/bcachefs/buckets.c104
-rw-r--r--fs/bcachefs/buckets.h3
-rw-r--r--fs/bcachefs/recovery.c107
6 files changed, 192 insertions, 107 deletions
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index cf0a2f4b22af..2650f60b7cd7 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -273,11 +273,40 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
(int) btree_id_to_gc_phase(r);
}
+static int mark_journal_key(struct bch_fs *c, enum btree_id id,
+ struct bkey_i *insert)
+{
+ struct btree_trans trans;
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ u8 max_stale;
+ int ret = 0;
+
+ ret = bch2_gc_mark_key(c, bkey_i_to_s_c(insert), &max_stale, true);
+ if (ret)
+ return ret;
+
+ bch2_trans_init(&trans, c);
+
+ for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
+ BTREE_ITER_SLOTS, k) {
+ percpu_down_read(&c->mark_lock);
+ ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL,
+ BCH_BUCKET_MARK_GC|
+ BCH_BUCKET_MARK_NOATOMIC);
+ percpu_up_read(&c->mark_lock);
+
+ if (!ret)
+ break;
+ }
+
+ return bch2_trans_exit(&trans);
+}
+
static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
bool initial, bool metadata_only)
{
enum btree_id ids[BTREE_ID_NR];
- u8 max_stale;
unsigned i;
for (i = 0; i < BTREE_ID_NR; i++)
@@ -299,9 +328,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
for_each_journal_key(*journal_keys, j)
if (j->btree_id == id) {
- ret = bch2_gc_mark_key(c,
- bkey_i_to_s_c(j->k),
- &max_stale, initial);
+ ret = mark_journal_key(c, id, j->k);
if (ret)
return ret;
}
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 75ed02874767..7a638a76634f 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -43,6 +43,7 @@ enum {
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
+ __BTREE_INSERT_NOMARK_OVERWRITES,
__BTREE_INSERT_NOMARK,
__BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD,
@@ -76,6 +77,9 @@ enum {
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
+/* Don't mark overwrites, just new key: */
+#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
+
/* Don't call bch2_mark_key: */
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 48d3be517471..2633a5452b13 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -542,20 +542,22 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
btree_trans_lock_write(c, trans);
- trans_for_each_update_iter(trans, i) {
- if (i->deferred ||
- !btree_node_type_needs_gc(i->iter->btree_id))
- continue;
+ if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
+ trans_for_each_update_iter(trans, i) {
+ if (i->deferred ||
+ !btree_node_type_needs_gc(i->iter->btree_id))
+ continue;
- if (!fs_usage) {
- percpu_down_read(&c->mark_lock);
- fs_usage = bch2_fs_usage_scratch_get(c);
- }
+ if (!fs_usage) {
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
+ }
- if (!bch2_bkey_replicas_marked_locked(c,
- bkey_i_to_s_c(i->k), true)) {
- ret = BTREE_INSERT_NEED_MARK_REPLICAS;
- goto out;
+ if (!bch2_bkey_replicas_marked_locked(c,
+ bkey_i_to_s_c(i->k), true)) {
+ ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+ goto out;
+ }
}
}
@@ -602,16 +604,18 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
linked->flags |= BTREE_ITER_NOUNLOCK;
}
- trans_for_each_update_iter(trans, i)
- bch2_mark_update(trans, i, fs_usage, 0);
- if (fs_usage)
- bch2_trans_fs_usage_apply(trans, fs_usage);
-
- if (unlikely(c->gc_pos.phase)) {
+ if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
trans_for_each_update_iter(trans, i)
- if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
- bch2_mark_update(trans, i, NULL,
- BCH_BUCKET_MARK_GC);
+ bch2_mark_update(trans, i, fs_usage, 0);
+ if (fs_usage)
+ bch2_trans_fs_usage_apply(trans, fs_usage);
+
+ if (unlikely(c->gc_pos.phase)) {
+ trans_for_each_update_iter(trans, i)
+ if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
+ bch2_mark_update(trans, i, NULL,
+ BCH_BUCKET_MARK_GC);
+ }
}
trans_for_each_update(trans, i)
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 4fe66ee1f745..7a05ba5fd589 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1035,6 +1035,56 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret;
}
+inline bool bch2_mark_overwrite(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c old,
+ struct bkey_i *new,
+ struct bch_fs_usage *fs_usage,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *b = iter->l[0].b;
+ s64 sectors = 0;
+
+ if (btree_node_is_extents(b)
+ ? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
+ : bkey_cmp(new->k.p, old.k->p))
+ return false;
+
+ if (btree_node_is_extents(b)) {
+ switch (bch2_extent_overlap(&new->k, old.k)) {
+ case BCH_EXTENT_OVERLAP_ALL:
+ sectors = -((s64) old.k->size);
+ break;
+ case BCH_EXTENT_OVERLAP_BACK:
+ sectors = bkey_start_offset(&new->k) -
+ old.k->p.offset;
+ break;
+ case BCH_EXTENT_OVERLAP_FRONT:
+ sectors = bkey_start_offset(old.k) -
+ new->k.p.offset;
+ break;
+ case BCH_EXTENT_OVERLAP_MIDDLE:
+ sectors = old.k->p.offset - new->k.p.offset;
+ BUG_ON(sectors <= 0);
+
+ bch2_mark_key_locked(c, old, true, sectors,
+ fs_usage, trans->journal_res.seq,
+ flags);
+
+ sectors = bkey_start_offset(&new->k) -
+ old.k->p.offset;
+ break;
+ }
+
+ BUG_ON(sectors >= 0);
+ }
+
+ bch2_mark_key_locked(c, old, false, sectors,
+ fs_usage, trans->journal_res.seq, flags);
+ return true;
+}
+
void bch2_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert,
struct bch_fs_usage *fs_usage,
@@ -1049,57 +1099,23 @@ void bch2_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return;
- if (!(trans->flags & BTREE_INSERT_NOMARK))
- bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
- bpos_min(insert->k->k.p, b->key.k.p).offset -
- bkey_start_offset(&insert->k->k),
- fs_usage, trans->journal_res.seq, flags);
+ bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
+ bpos_min(insert->k->k.p, b->key.k.p).offset -
+ bkey_start_offset(&insert->k->k),
+ fs_usage, trans->journal_res.seq, flags);
+
+ if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
+ return;
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
struct bkey unpacked;
- struct bkey_s_c k;
- s64 sectors = 0;
+ struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
- k = bkey_disassemble(b, _k, &unpacked);
-
- if (btree_node_is_extents(b)
- ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
- : bkey_cmp(insert->k->k.p, k.k->p))
+ if (!bch2_mark_overwrite(trans, iter, k, insert->k,
+ fs_usage, flags))
break;
- if (btree_node_is_extents(b)) {
- switch (bch2_extent_overlap(&insert->k->k, k.k)) {
- case BCH_EXTENT_OVERLAP_ALL:
- sectors = -((s64) k.k->size);
- break;
- case BCH_EXTENT_OVERLAP_BACK:
- sectors = bkey_start_offset(&insert->k->k) -
- k.k->p.offset;
- break;
- case BCH_EXTENT_OVERLAP_FRONT:
- sectors = bkey_start_offset(k.k) -
- insert->k->k.p.offset;
- break;
- case BCH_EXTENT_OVERLAP_MIDDLE:
- sectors = k.k->p.offset - insert->k->k.p.offset;
- BUG_ON(sectors <= 0);
-
- bch2_mark_key_locked(c, k, true, sectors,
- fs_usage, trans->journal_res.seq,
- flags);
-
- sectors = bkey_start_offset(&insert->k->k) -
- k.k->p.offset;
- break;
- }
-
- BUG_ON(sectors >= 0);
- }
-
- bch2_mark_key_locked(c, k, false, sectors,
- fs_usage, trans->journal_res.seq, flags);
-
bch2_btree_node_iter_advance(&node_iter, b);
}
}
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 095015f17f76..90fffee1c289 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -254,6 +254,9 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *);
+bool bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
+ struct bkey_s_c, struct bkey_i *,
+ struct bch_fs_usage *, unsigned);
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *, unsigned);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 5bfb38c4290f..d207ff7b98f4 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -203,63 +203,94 @@ static void replay_now_at(struct journal *j, u64 seq)
static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
{
struct btree_trans trans;
- struct btree_iter *iter;
+ struct btree_iter *iter, *split_iter;
/*
- * We might cause compressed extents to be
- * split, so we need to pass in a
- * disk_reservation:
+ * We might cause compressed extents to be split, so we need to pass in
+ * a disk_reservation:
*/
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
- BKEY_PADDED(k) split;
+ struct bkey_i *split;
+ bool split_compressed = false;
+ unsigned flags = BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_JOURNAL_REPLAY|
+ BTREE_INSERT_NOMARK;
int ret;
bch2_trans_init(&trans, c);
+ bch2_trans_preload_iters(&trans);
+retry:
+ bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
+
do {
ret = bch2_btree_iter_traverse(iter);
if (ret)
- break;
+ goto err;
- bkey_copy(&split.k, k);
- bch2_cut_front(iter->pos, &split.k);
- bch2_extent_trim_atomic(&split.k, iter);
-
- ret = bch2_disk_reservation_add(c, &disk_res,
- split.k.k.size *
- bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&split.k)),
- BCH_DISK_RESERVATION_NOFAIL);
- BUG_ON(ret);
-
- bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &split.k));
- ret = bch2_trans_commit(&trans, &disk_res, NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_JOURNAL_REPLAY);
- } while ((!ret || ret == -EINTR) &&
- bkey_cmp(k->k.p, iter->pos));
+ split_iter = bch2_trans_copy_iter(&trans, iter);
+ ret = PTR_ERR_OR_ZERO(split_iter);
+ if (ret)
+ goto err;
- bch2_disk_reservation_put(c, &disk_res);
+ split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
+ ret = PTR_ERR_OR_ZERO(split);
+ if (ret)
+ goto err;
- /*
- * This isn't strictly correct - we should only be relying on the btree
- * node lock for synchronization with gc when we've got a write lock
- * held.
- *
- * but - there are other correctness issues if btree gc were to run
- * before journal replay finishes
- */
- BUG_ON(c->gc_pos.phase);
+ if (!split_compressed &&
+ bch2_extent_is_compressed(bkey_i_to_s_c(k)) &&
+ !bch2_extent_is_atomic(k, split_iter)) {
+ ret = bch2_disk_reservation_add(c, &disk_res,
+ k->k.size *
+ bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(k)),
+ BCH_DISK_RESERVATION_NOFAIL);
+ BUG_ON(ret);
+
+ flags &= ~BTREE_INSERT_JOURNAL_REPLAY;
+ flags &= ~BTREE_INSERT_NOMARK;
+ flags |= BTREE_INSERT_NOMARK_OVERWRITES;
+ split_compressed = true;
+ }
- bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
- NULL, 0, 0);
- bch2_trans_exit(&trans);
+ bkey_copy(split, k);
+ bch2_cut_front(split_iter->pos, split);
+ bch2_extent_trim_atomic(split, split_iter);
- return ret;
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(split_iter, split));
+ bch2_btree_iter_set_pos(iter, split->k.p);
+ } while (bkey_cmp(iter->pos, k->k.p) < 0);
+
+ ret = bch2_trans_commit(&trans, &disk_res, NULL, flags);
+ if (ret)
+ goto err;
+
+ if (split_compressed) {
+ /*
+ * This isn't strictly correct - we should only be relying on
+ * the btree node lock for synchronization with gc when we've
+ * got a write lock held.
+ *
+ * but - there are other correctness issues if btree gc were to
+ * run before journal replay finishes
+ */
+ BUG_ON(c->gc_pos.phase);
+
+ bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
+ NULL, 0, 0);
+ }
+err:
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_disk_reservation_put(c, &disk_res);
+
+ return bch2_trans_exit(&trans) ?: ret;
}
static int bch2_journal_replay(struct bch_fs *c,