summaryrefslogtreecommitdiff
path: root/fs/bcachefs/snapshot.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/snapshot.c')
-rw-r--r--fs/bcachefs/snapshot.c274
1 files changed, 229 insertions, 45 deletions
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 39debe814bf3..629900a5e641 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -8,6 +8,7 @@
#include "errcode.h"
#include "error.h"
#include "fs.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include <linux/random.h>
@@ -31,7 +32,7 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k,
- enum bkey_invalid_flags flags,
+ enum bch_validate_flags flags,
struct printbuf *err)
{
int ret = 0;
@@ -48,7 +49,7 @@ int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot_tree *s)
{
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
- BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
+ BTREE_ITER_with_updates, snapshot_tree, s);
if (bch2_err_matches(ret, ENOENT))
ret = -BCH_ERR_ENOENT_snapshot_tree;
@@ -93,8 +94,10 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
{
- while (id && id < ancestor)
- id = __snapshot_t(t, id)->parent;
+ while (id && id < ancestor) {
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ id = s ? s->parent : 0;
+ }
return id == ancestor;
}
@@ -110,6 +113,8 @@ static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancest
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{
const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return 0;
if (s->skip[2] <= ancestor)
return s->skip[2];
@@ -120,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
return s->parent;
}
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return false;
+
+ return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
bool ret;
@@ -127,7 +141,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
@@ -135,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- if (id && id < ancestor) {
- ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+ ret = id && id < ancestor
+ ? test_ancestor_bitmap(t, id, ancestor)
+ : id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
- } else {
- ret = id == ancestor;
- }
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
out:
rcu_read_unlock();
@@ -151,36 +163,39 @@ out:
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
- size_t new_size;
struct snapshot_table *new, *old;
- new_size = max(16UL, roundup_pow_of_two(idx + 1));
+ size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
+ size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
- new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+ new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
+ new->nr = new_size;
+
old = rcu_dereference_protected(c->snapshots, true);
if (old)
- memcpy(new->s,
- rcu_dereference_protected(c->snapshots, true)->s,
- sizeof(new->s[0]) * c->snapshot_table_size);
+ memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
rcu_assign_pointer(c->snapshots, new);
- c->snapshot_table_size = new_size;
- kvfree_rcu_mightsleep(old);
+ kvfree_rcu(old, rcu);
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ return &rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock))->s[idx];
}
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
+ struct snapshot_table *table =
+ rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock));
lockdep_assert_held(&c->snapshot_table_lock);
- if (likely(idx < c->snapshot_table_size))
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ if (likely(table && idx < table->nr))
+ return &table->s[idx];
return __snapshot_t_mut(c, id);
}
@@ -208,7 +223,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k,
- enum bkey_invalid_flags flags,
+ enum bch_validate_flags flags,
struct printbuf *err)
{
struct bkey_s_c_snapshot s;
@@ -283,7 +298,7 @@ static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+ enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
struct snapshot_t *t;
@@ -337,7 +352,7 @@ err:
int bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s new,
- unsigned flags)
+ enum btree_iter_update_trigger_flags flags)
{
return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
}
@@ -346,7 +361,7 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s)
{
return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
- BTREE_ITER_WITH_UPDATES, snapshot, s);
+ BTREE_ITER_with_updates, snapshot, s);
}
static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
@@ -567,6 +582,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+ bch_err_fn(c, ret);
+
+ if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
+ ret = 0;
+ goto err;
+ }
+
if (ret)
goto err;
@@ -596,7 +618,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c)
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter,
BTREE_ID_snapshot_trees, POS_MIN,
- BTREE_ITER_PREFETCH, k,
+ BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot_tree(trans, &iter, k)));
bch_err_fn(c, ret);
@@ -673,7 +695,7 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
root = bch2_bkey_get_iter_typed(trans, &root_iter,
BTREE_ID_snapshots, POS(0, root_id),
- BTREE_ITER_WITH_UPDATES, snapshot);
+ BTREE_ITER_with_updates, snapshot);
ret = bkey_err(root);
if (ret)
goto err;
@@ -724,7 +746,6 @@ static int check_snapshot(struct btree_trans *trans,
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth;
struct printbuf buf = PRINTBUF;
- bool should_have_subvol;
u32 i, id;
int ret = 0;
@@ -770,7 +791,7 @@ static int check_snapshot(struct btree_trans *trans,
}
}
- should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+ bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
@@ -865,13 +886,162 @@ int bch2_check_snapshots(struct bch_fs *c)
int ret = bch2_trans_run(c,
for_each_btree_key_reverse_commit(trans, iter,
BTREE_ID_snapshots, POS_MAX,
- BTREE_ITER_PREFETCH, k,
+ BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot(trans, &iter, k)));
bch_err_fn(c, ret);
return ret;
}
+static int check_snapshot_exists(struct btree_trans *trans, u32 id)
+{
+ struct bch_fs *c = trans->c;
+
+ if (bch2_snapshot_equiv(c, id))
+ return 0;
+
+ /* 0 is an invalid tree ID */
+ u32 tree_id = 0;
+ int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+ if (ret)
+ return ret;
+
+ struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
+ ret = PTR_ERR_OR_ZERO(snapshot);
+ if (ret)
+ return ret;
+
+ bkey_snapshot_init(&snapshot->k_i);
+ snapshot->k.p = POS(0, id);
+ snapshot->v.tree = cpu_to_le32(tree_id);
+ snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
+
+ return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
+ bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+}
+
+/* Figure out which snapshot nodes belong in the same tree: */
+struct snapshot_tree_reconstruct {
+ enum btree_id btree;
+ struct bpos cur_pos;
+ snapshot_id_list cur_ids;
+ DARRAY(snapshot_id_list) trees;
+};
+
+static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
+{
+ darray_for_each(r->trees, i)
+ darray_exit(i);
+ darray_exit(&r->trees);
+ darray_exit(&r->cur_ids);
+}
+
+static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ return r->btree == BTREE_ID_inodes
+ ? r->cur_pos.offset == pos.offset
+ : r->cur_pos.inode == pos.inode;
+}
+
+static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
+{
+ darray_for_each(*l, i)
+ if (snapshot_list_has_id(r, *i))
+ return true;
+ return false;
+}
+
+static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
+{
+ bool first = true;
+ darray_for_each(*s, i) {
+ if (!first)
+ prt_char(out, ' ');
+ first = false;
+ prt_printf(out, "%u", *i);
+ }
+}
+
+static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
+{
+ if (r->cur_ids.nr) {
+ darray_for_each(r->trees, i)
+ if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
+ int ret = snapshot_list_merge(c, i, &r->cur_ids);
+ if (ret)
+ return ret;
+ goto out;
+ }
+ darray_push(&r->trees, r->cur_ids);
+ darray_init(&r->cur_ids);
+ }
+out:
+ r->cur_ids.nr = 0;
+ return 0;
+}
+
+static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ if (!same_snapshot(r, pos))
+ snapshot_tree_reconstruct_next(c, r);
+ r->cur_pos = pos;
+ return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
+}
+
+int bch2_reconstruct_snapshots(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct printbuf buf = PRINTBUF;
+ struct snapshot_tree_reconstruct r = {};
+ int ret = 0;
+
+ for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
+ if (btree_type_has_snapshots(btree)) {
+ r.btree = btree;
+
+ ret = for_each_btree_key(trans, iter, btree, POS_MIN,
+ BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({
+ get_snapshot_trees(c, &r, k.k->p);
+ }));
+ if (ret)
+ goto err;
+
+ snapshot_tree_reconstruct_next(c, &r);
+ }
+ }
+
+ darray_for_each(r.trees, t) {
+ printbuf_reset(&buf);
+ snapshot_id_list_to_text(&buf, t);
+
+ darray_for_each(*t, id) {
+ if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
+ c, snapshot_node_missing,
+ "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
+ if (t->nr > 1) {
+ bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
+ }
+
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_snapshot_exists(trans, *id));
+ if (ret)
+ goto err;
+ }
+ }
+ }
+fsck_err:
+err:
+ bch2_trans_put(trans);
+ snapshot_tree_reconstruct_exit(&r);
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
@@ -921,7 +1091,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
int ret = 0;
s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
- BTREE_ITER_INTENT, snapshot);
+ BTREE_ITER_intent, snapshot);
ret = bkey_err(s);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
"missing snapshot %u", id);
@@ -1030,7 +1200,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
- POS_MIN, BTREE_ITER_INTENT);
+ POS_MIN, BTREE_ITER_intent);
k = bch2_btree_iter_peek(&iter);
ret = bkey_err(k);
if (ret)
@@ -1198,7 +1368,7 @@ static int snapshot_delete_key(struct btree_trans *trans,
if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
snapshot_list_has_id(equiv_seen, equiv)) {
return bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ BTREE_UPDATE_internal_snapshot_node);
} else {
return snapshot_list_add(c, equiv_seen, equiv);
}
@@ -1235,15 +1405,15 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans,
new->k.p.snapshot = equiv;
bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
- BTREE_ITER_ALL_SNAPSHOTS|
- BTREE_ITER_CACHED|
- BTREE_ITER_INTENT);
+ BTREE_ITER_all_snapshots|
+ BTREE_ITER_cached|
+ BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(&new_iter) ?:
bch2_trans_update(trans, &new_iter, new,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ BTREE_UPDATE_internal_snapshot_node) ?:
bch2_btree_delete_at(trans, iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &new_iter);
if (ret)
return ret;
@@ -1434,12 +1604,12 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
ret = for_each_btree_key_commit(trans, iter,
id, POS_MIN,
- BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
for_each_btree_key_commit(trans, iter,
id, POS_MIN,
- BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
move_key_to_correct_snapshot(trans, &iter, k));
@@ -1474,7 +1644,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
* nodes some depth fields will be off:
*/
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
- BTREE_ITER_INTENT, k,
+ BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
if (ret)
@@ -1530,8 +1700,8 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
int ret;
bch2_trans_iter_init(trans, &iter, id, pos,
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_ALL_SNAPSHOTS);
+ BTREE_ITER_not_extents|
+ BTREE_ITER_all_snapshots);
while (1) {
k = bch2_btree_iter_prev(&iter);
ret = bkey_err(k);
@@ -1583,7 +1753,7 @@ static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
pos.snapshot = leaf_id;
- bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+ bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
@@ -1682,6 +1852,20 @@ int bch2_snapshots_read(struct bch_fs *c)
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch_err_fn(c, ret);
+
+ /*
+ * It's important that we check if we need to reconstruct snapshots
+ * before going RW, so we mark that pass as required in the superblock -
+ * otherwise, we could end up deleting keys with missing snapshot nodes
+ * instead
+ */
+ BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+ test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ if (bch2_err_matches(ret, EIO) ||
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
+ ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
+
return ret;
}