summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-06-14 02:07:19 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:09:34 +0300
commitc501fef6deb1de13d45d22a3df32906adf17275b (patch)
treef514d7ff6fc2aca72cd22f381a6bace2d83bc3f9
parent30f0349d62429effd729ae9272c6fb57f47d1436 (diff)
downloadlinux-c501fef6deb1de13d45d22a3df32906adf17275b.tar.xz
bcachefs: Pull out data_update.c
This is the start of reorganizing the data IO paths. The plan is to also break apart io.c into data_read.c and data_write.c, and migrate_write will be renamed to the data_update path. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/Makefile1
-rw-r--r--fs/bcachefs/data_update.c383
-rw-r--r--fs/bcachefs/data_update.h48
-rw-r--r--fs/bcachefs/ec.h1
-rw-r--r--fs/bcachefs/io.c9
-rw-r--r--fs/bcachefs/move.c393
-rw-r--r--fs/bcachefs/move.h45
7 files changed, 452 insertions, 428 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index fada601c10db..95b990ad0196 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -25,6 +25,7 @@ bcachefs-y := \
debug.o \
dirent.o \
disk_groups.o \
+ data_update.o \
ec.o \
error.o \
extents.o \
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
new file mode 100644
index 000000000000..0161b0a9f36e
--- /dev/null
+++ b/fs/bcachefs/data_update.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "data_update.h"
+#include "ec.h"
+#include "extents.h"
+#include "io.h"
+#include "keylist.h"
+#include "move.h"
+#include "subvolume.h"
+#include "trace.h"
+
+static int insert_snapshot_whiteouts(struct btree_trans *trans,
+ enum btree_id id,
+ struct bpos old_pos,
+ struct bpos new_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter, update_iter;
+ struct bkey_s_c k;
+ struct snapshots_seen s;
+ int ret;
+
+ if (!btree_type_has_snapshots(id))
+ return 0;
+
+ snapshots_seen_init(&s);
+
+ if (!bkey_cmp(old_pos, new_pos))
+ return 0;
+
+ if (!snapshot_t(c, old_pos.snapshot)->children[0])
+ return 0;
+
+ bch2_trans_iter_init(trans, &iter, id, old_pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS);
+ while (1) {
+next:
+ k = bch2_btree_iter_prev(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ if (bkey_cmp(old_pos, k.k->p))
+ break;
+
+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
+ struct bkey_i *update;
+ u32 *i;
+
+ darray_for_each(s.ids, i)
+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i))
+ goto next;
+
+ update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ break;
+
+ bkey_init(&update->k);
+ update->k.p = new_pos;
+ update->k.p.snapshot = k.k->p.snapshot;
+
+ bch2_trans_iter_init(trans, &update_iter, id, update->k.p,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&update_iter) ?:
+ bch2_trans_update(trans, &update_iter, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch2_trans_iter_exit(trans, &update_iter);
+ if (ret)
+ break;
+
+ ret = snapshots_seen_add(c, &s, k.k->p.snapshot);
+ if (ret)
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+ darray_exit(&s.ids);
+
+ return ret;
+}
+
+int bch2_data_update_index_update(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct data_update *m =
+ container_of(op, struct data_update, op);
+ struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets);
+ struct keylist *keys = &op->insert_keys;
+ struct bkey_buf _new, _insert;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&_new);
+ bch2_bkey_buf_init(&_insert);
+ bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+
+ bch2_trans_iter_init(&trans, &iter, m->btree_id,
+ bkey_start_pos(&bch2_keylist_front(keys)->k),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ while (1) {
+ struct bkey_s_c k;
+ struct bkey_i *insert;
+ struct bkey_i_extent *new;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bpos next_pos;
+ bool did_work = false;
+ bool extending = false, should_check_enospc;
+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
+
+ bch2_trans_begin(&trans);
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ new = bkey_i_to_extent(bch2_keylist_front(keys));
+
+ if (bversion_cmp(k.k->version, new->k.version) ||
+ !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
+ goto nomatch;
+
+ bkey_reassemble(_insert.k, k);
+ insert = _insert.k;
+
+ bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
+ new = bkey_i_to_extent(_new.k);
+ bch2_cut_front(iter.pos, &new->k_i);
+
+ bch2_cut_front(iter.pos, insert);
+ bch2_cut_back(new->k.p, insert);
+ bch2_cut_back(insert->k.p, &new->k_i);
+
+ if (m->data_cmd == DATA_REWRITE) {
+ struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
+ bch2_bkey_has_device(bkey_i_to_s_c(insert),
+ m->data_opts.rewrite_dev);
+ if (!old_ptr)
+ goto nomatch;
+
+ if (old_ptr->cached)
+ extent_for_each_ptr(extent_i_to_s(new), new_ptr)
+ new_ptr->cached = true;
+
+ __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+ }
+
+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
+ if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
+ /*
+ * raced with another move op? extent already
+ * has a pointer to the device we just wrote
+ * data to
+ */
+ continue;
+ }
+
+ bch2_extent_ptr_decoded_append(insert, &p);
+ did_work = true;
+ }
+
+ if (!did_work)
+ goto nomatch;
+
+ bch2_bkey_narrow_crcs(insert,
+ (struct bch_extent_crc_unpacked) { 0 });
+ bch2_extent_normalize(c, bkey_i_to_s(insert));
+ bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
+ op->opts.background_target,
+ op->opts.data_replicas);
+
+ ret = bch2_sum_sector_overwrites(&trans, &iter, insert,
+ &extending,
+ &should_check_enospc,
+ &i_sectors_delta,
+ &disk_sectors_delta);
+ if (ret)
+ goto err;
+
+ if (disk_sectors_delta > (s64) op->res.sectors) {
+ ret = bch2_disk_reservation_add(c, &op->res,
+ disk_sectors_delta - op->res.sectors,
+ !should_check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
+ if (ret)
+ goto out;
+ }
+
+ next_pos = insert->k.p;
+
+ ret = insert_snapshot_whiteouts(&trans, m->btree_id,
+ k.k->p, insert->k.p) ?:
+ bch2_trans_update(&trans, &iter, insert,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(&trans, &op->res,
+ op_journal_seq(op),
+ BTREE_INSERT_NOFAIL|
+ m->data_opts.btree_insert_flags);
+ if (!ret) {
+ bch2_btree_iter_set_pos(&iter, next_pos);
+ atomic_long_inc(&c->extent_migrate_done);
+ if (ec_ob)
+ bch2_ob_add_backpointer(c, ec_ob, &insert->k);
+ }
+err:
+ if (ret == -EINTR)
+ ret = 0;
+ if (ret)
+ break;
+next:
+ while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) {
+ bch2_keylist_pop_front(keys);
+ if (bch2_keylist_empty(keys))
+ goto out;
+ }
+ continue;
+nomatch:
+ if (m->ctxt) {
+ BUG_ON(k.k->p.offset <= iter.pos.offset);
+ atomic64_inc(&m->ctxt->stats->keys_raced);
+ atomic64_add(k.k->p.offset - iter.pos.offset,
+ &m->ctxt->stats->sectors_raced);
+ }
+ atomic_long_inc(&c->extent_migrate_raced);
+ trace_move_race(&new->k);
+ bch2_btree_iter_advance(&iter);
+ goto next;
+ }
+out:
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
+ bch2_bkey_buf_exit(&_insert, c);
+ bch2_bkey_buf_exit(&_new, c);
+ BUG_ON(ret == -EINTR);
+ return ret;
+}
+
+void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio)
+{
+ /* write bio must own pages: */
+ BUG_ON(!m->op.wbio.bio.bi_vcnt);
+
+ m->ptr = rbio->pick.ptr;
+ m->offset = rbio->data_pos.offset - rbio->pick.crc.offset;
+ m->op.devs_have = rbio->devs_have;
+ m->op.pos = rbio->data_pos;
+ m->op.version = rbio->version;
+ m->op.crc = rbio->pick.crc;
+ m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
+
+ if (m->data_cmd == DATA_REWRITE)
+ bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
+}
+
+int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
+ struct write_point_specifier wp,
+ struct bch_io_opts io_opts,
+ enum data_cmd data_cmd,
+ struct data_opts data_opts,
+ enum btree_id btree_id,
+ struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct bch_extent_crc_unpacked crc;
+ struct extent_ptr_decoded p;
+ int ret;
+
+ m->btree_id = btree_id;
+ m->data_cmd = data_cmd;
+ m->data_opts = data_opts;
+ m->nr_ptrs_reserved = 0;
+
+ bch2_write_op_init(&m->op, c, io_opts);
+
+ if (!bch2_bkey_is_incompressible(k))
+ m->op.compression_type =
+ bch2_compression_opt_to_type[io_opts.background_compression ?:
+ io_opts.compression];
+ else
+ m->op.incompressible = true;
+
+ m->op.target = data_opts.target,
+ m->op.write_point = wp;
+
+ /*
+ * op->csum_type is normally initialized from the fs/file's current
+ * options - but if an extent is encrypted, we require that it stays
+ * encrypted:
+ */
+ bkey_for_each_crc(k.k, ptrs, crc, entry)
+ if (bch2_csum_type_is_encryption(crc.csum_type)) {
+ m->op.nonce = crc.nonce + crc.offset;
+ m->op.csum_type = crc.csum_type;
+ break;
+ }
+
+ if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
+ m->op.alloc_reserve = RESERVE_movinggc;
+ } else {
+ /* XXX: this should probably be passed in */
+ m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
+ }
+
+ m->op.flags |= BCH_WRITE_PAGES_STABLE|
+ BCH_WRITE_PAGES_OWNED|
+ BCH_WRITE_DATA_ENCODED|
+ BCH_WRITE_FROM_INTERNAL|
+ BCH_WRITE_MOVE;
+
+ m->op.nr_replicas = data_opts.nr_replicas;
+ m->op.nr_replicas_required = data_opts.nr_replicas;
+
+ switch (data_cmd) {
+ case DATA_ADD_REPLICAS: {
+ /*
+ * DATA_ADD_REPLICAS is used for moving data to a different
+ * device in the background, and due to compression the new copy
+ * might take up more space than the old copy:
+ */
+#if 0
+ int nr = (int) io_opts.data_replicas -
+ bch2_bkey_nr_ptrs_allocated(k);
+#endif
+ int nr = (int) io_opts.data_replicas;
+
+ if (nr > 0) {
+ m->op.nr_replicas = m->nr_ptrs_reserved = nr;
+
+ ret = bch2_disk_reservation_get(c, &m->op.res,
+ k.k->size, m->op.nr_replicas, 0);
+ if (ret)
+ return ret;
+ }
+ break;
+ }
+ case DATA_REWRITE: {
+ unsigned compressed_sectors = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (p.ptr.dev == data_opts.rewrite_dev) {
+ if (p.ptr.cached)
+ m->op.flags |= BCH_WRITE_CACHED;
+
+ if (!p.ptr.cached &&
+ crc_is_compressed(p.crc))
+ compressed_sectors += p.crc.compressed_size;
+ }
+
+ if (compressed_sectors) {
+ ret = bch2_disk_reservation_add(c, &m->op.res,
+ k.k->size * m->op.nr_replicas,
+ BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ return ret;
+ }
+ break;
+ }
+ case DATA_PROMOTE:
+ m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
+ m->op.flags |= BCH_WRITE_CACHED;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
new file mode 100644
index 000000000000..03b4ca5a4ee8
--- /dev/null
+++ b/fs/bcachefs/data_update.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BCACHEFS_DATA_UPDATE_H
+#define _BCACHEFS_DATA_UPDATE_H
+
+#include "io_types.h"
+
+enum data_cmd {
+ DATA_SKIP,
+ DATA_SCRUB,
+ DATA_ADD_REPLICAS,
+ DATA_REWRITE,
+ DATA_PROMOTE,
+};
+
+struct data_opts {
+ u16 target;
+ u8 rewrite_dev;
+ u8 nr_replicas;
+ int btree_insert_flags;
+};
+
+struct data_update {
+ enum btree_id btree_id;
+ enum data_cmd data_cmd;
+ struct data_opts data_opts;
+
+ unsigned nr_ptrs_reserved;
+
+ struct moving_context *ctxt;
+
+ /* what we read: */
+ struct bch_extent_ptr ptr;
+ u64 offset;
+
+ struct bch_write_op op;
+};
+
+int bch2_data_update_index_update(struct bch_write_op *);
+
+void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *);
+int bch2_data_update_init(struct bch_fs *, struct data_update *,
+ struct write_point_specifier,
+ struct bch_io_opts,
+ enum data_cmd, struct data_opts,
+ enum btree_id, struct bkey_s_c);
+
+#endif /* _BCACHEFS_DATA_UPDATE_H */
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index a4c13d61af10..c53187df4651 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -4,6 +4,7 @@
#include "ec_types.h"
#include "buckets_types.h"
+#include "extents_types.h"
#include "keylist_types.h"
int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c,
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index ca72a31da502..743449ed7fae 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -16,6 +16,7 @@
#include "checksum.h"
#include "compress.h"
#include "clock.h"
+#include "data_update.h"
#include "debug.h"
#include "disk_groups.h"
#include "ec.h"
@@ -660,7 +661,7 @@ static void __bch2_write_index(struct bch_write_op *op)
ret = !(op->flags & BCH_WRITE_MOVE)
? bch2_write_index_default(op)
- : bch2_migrate_index_update(op);
+ : bch2_data_update_index_update(op);
BUG_ON(ret == -EINTR);
BUG_ON(keylist_sectors(keys) && !ret);
@@ -1433,7 +1434,7 @@ struct promote_op {
struct rhash_head hash;
struct bpos pos;
- struct migrate_write write;
+ struct data_update write;
struct bio_vec bi_inline_vecs[0]; /* must be last */
};
@@ -1508,7 +1509,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
- bch2_migrate_read_done(&op->write, rbio);
+ bch2_data_update_read_done(&op->write, rbio);
closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, NULL);
}
@@ -1565,7 +1566,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
bio = &op->write.op.wbio.bio;
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
- ret = bch2_migrate_write_init(c, &op->write,
+ ret = bch2_data_update_init(c, &op->write,
writepoint_hashed((unsigned long) current),
opts,
DATA_PROMOTE,
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 08fb8c71893f..a3a486cff28e 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -6,7 +6,6 @@
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_update_interior.h"
-#include "buckets.h"
#include "disk_groups.h"
#include "ec.h"
#include "inode.h"
@@ -15,7 +14,6 @@
#include "keylist.h"
#include "move.h"
#include "replicas.h"
-#include "subvolume.h"
#include "super-io.h"
#include "trace.h"
@@ -32,394 +30,11 @@ struct moving_io {
struct bch_read_bio rbio;
- struct migrate_write write;
+ struct data_update write;
/* Must be last since it is variable size */
struct bio_vec bi_inline_vecs[0];
};
-struct moving_context {
- /* Closure for waiting on all reads and writes to complete */
- struct closure cl;
-
- struct bch_move_stats *stats;
-
- struct list_head reads;
-
- /* in flight sectors: */
- atomic_t read_sectors;
- atomic_t write_sectors;
-
- wait_queue_head_t wait;
-};
-
-static int insert_snapshot_whiteouts(struct btree_trans *trans,
- enum btree_id id,
- struct bpos old_pos,
- struct bpos new_pos)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter iter, update_iter;
- struct bkey_s_c k;
- struct snapshots_seen s;
- int ret;
-
- if (!btree_type_has_snapshots(id))
- return 0;
-
- snapshots_seen_init(&s);
-
- if (!bkey_cmp(old_pos, new_pos))
- return 0;
-
- if (!snapshot_t(c, old_pos.snapshot)->children[0])
- return 0;
-
- bch2_trans_iter_init(trans, &iter, id, old_pos,
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_ALL_SNAPSHOTS);
- while (1) {
-next:
- k = bch2_btree_iter_prev(&iter);
- ret = bkey_err(k);
- if (ret)
- break;
-
- if (bkey_cmp(old_pos, k.k->p))
- break;
-
- if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
- struct bkey_i *update;
- u32 *i;
-
- darray_for_each(s.ids, i)
- if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i))
- goto next;
-
- update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-
- ret = PTR_ERR_OR_ZERO(update);
- if (ret)
- break;
-
- bkey_init(&update->k);
- update->k.p = new_pos;
- update->k.p.snapshot = k.k->p.snapshot;
-
- bch2_trans_iter_init(trans, &update_iter, id, update->k.p,
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_ALL_SNAPSHOTS|
- BTREE_ITER_INTENT);
- ret = bch2_btree_iter_traverse(&update_iter) ?:
- bch2_trans_update(trans, &update_iter, update,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
- bch2_trans_iter_exit(trans, &update_iter);
- if (ret)
- break;
-
- ret = snapshots_seen_add(c, &s, k.k->p.snapshot);
- if (ret)
- break;
- }
- }
- bch2_trans_iter_exit(trans, &iter);
- darray_exit(&s.ids);
-
- return ret;
-}
-
-int bch2_migrate_index_update(struct bch_write_op *op)
-{
- struct bch_fs *c = op->c;
- struct btree_trans trans;
- struct btree_iter iter;
- struct migrate_write *m =
- container_of(op, struct migrate_write, op);
- struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets);
- struct keylist *keys = &op->insert_keys;
- struct bkey_buf _new, _insert;
- int ret = 0;
-
- bch2_bkey_buf_init(&_new);
- bch2_bkey_buf_init(&_insert);
- bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
-
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-
- bch2_trans_iter_init(&trans, &iter, m->btree_id,
- bkey_start_pos(&bch2_keylist_front(keys)->k),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-
- while (1) {
- struct bkey_s_c k;
- struct bkey_i *insert;
- struct bkey_i_extent *new;
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- struct bpos next_pos;
- bool did_work = false;
- bool extending = false, should_check_enospc;
- s64 i_sectors_delta = 0, disk_sectors_delta = 0;
-
- bch2_trans_begin(&trans);
-
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
- if (ret)
- goto err;
-
- new = bkey_i_to_extent(bch2_keylist_front(keys));
-
- if (bversion_cmp(k.k->version, new->k.version) ||
- !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
- goto nomatch;
-
- bkey_reassemble(_insert.k, k);
- insert = _insert.k;
-
- bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
- new = bkey_i_to_extent(_new.k);
- bch2_cut_front(iter.pos, &new->k_i);
-
- bch2_cut_front(iter.pos, insert);
- bch2_cut_back(new->k.p, insert);
- bch2_cut_back(insert->k.p, &new->k_i);
-
- if (m->data_cmd == DATA_REWRITE) {
- struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
- bch2_bkey_has_device(bkey_i_to_s_c(insert),
- m->data_opts.rewrite_dev);
- if (!old_ptr)
- goto nomatch;
-
- if (old_ptr->cached)
- extent_for_each_ptr(extent_i_to_s(new), new_ptr)
- new_ptr->cached = true;
-
- __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
- }
-
- extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
- if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
- /*
- * raced with another move op? extent already
- * has a pointer to the device we just wrote
- * data to
- */
- continue;
- }
-
- bch2_extent_ptr_decoded_append(insert, &p);
- did_work = true;
- }
-
- if (!did_work)
- goto nomatch;
-
- bch2_bkey_narrow_crcs(insert,
- (struct bch_extent_crc_unpacked) { 0 });
- bch2_extent_normalize(c, bkey_i_to_s(insert));
- bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
- op->opts.background_target,
- op->opts.data_replicas);
-
- ret = bch2_sum_sector_overwrites(&trans, &iter, insert,
- &extending,
- &should_check_enospc,
- &i_sectors_delta,
- &disk_sectors_delta);
- if (ret)
- goto err;
-
- if (disk_sectors_delta > (s64) op->res.sectors) {
- ret = bch2_disk_reservation_add(c, &op->res,
- disk_sectors_delta - op->res.sectors,
- !should_check_enospc
- ? BCH_DISK_RESERVATION_NOFAIL : 0);
- if (ret)
- goto out;
- }
-
- next_pos = insert->k.p;
-
- ret = insert_snapshot_whiteouts(&trans, m->btree_id,
- k.k->p, insert->k.p) ?:
- bch2_trans_update(&trans, &iter, insert,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
- bch2_trans_commit(&trans, &op->res,
- op_journal_seq(op),
- BTREE_INSERT_NOFAIL|
- m->data_opts.btree_insert_flags);
- if (!ret) {
- bch2_btree_iter_set_pos(&iter, next_pos);
- atomic_long_inc(&c->extent_migrate_done);
- if (ec_ob)
- bch2_ob_add_backpointer(c, ec_ob, &insert->k);
- }
-err:
- if (ret == -EINTR)
- ret = 0;
- if (ret)
- break;
-next:
- while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) {
- bch2_keylist_pop_front(keys);
- if (bch2_keylist_empty(keys))
- goto out;
- }
- continue;
-nomatch:
- if (m->ctxt) {
- BUG_ON(k.k->p.offset <= iter.pos.offset);
- atomic64_inc(&m->ctxt->stats->keys_raced);
- atomic64_add(k.k->p.offset - iter.pos.offset,
- &m->ctxt->stats->sectors_raced);
- }
- atomic_long_inc(&c->extent_migrate_raced);
- trace_move_race(&new->k);
- bch2_btree_iter_advance(&iter);
- goto next;
- }
-out:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&_insert, c);
- bch2_bkey_buf_exit(&_new, c);
- BUG_ON(ret == -EINTR);
- return ret;
-}
-
-void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
-{
- /* write bio must own pages: */
- BUG_ON(!m->op.wbio.bio.bi_vcnt);
-
- m->ptr = rbio->pick.ptr;
- m->offset = rbio->data_pos.offset - rbio->pick.crc.offset;
- m->op.devs_have = rbio->devs_have;
- m->op.pos = rbio->data_pos;
- m->op.version = rbio->version;
- m->op.crc = rbio->pick.crc;
- m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
-
- if (m->data_cmd == DATA_REWRITE)
- bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
-}
-
-int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
- struct write_point_specifier wp,
- struct bch_io_opts io_opts,
- enum data_cmd data_cmd,
- struct data_opts data_opts,
- enum btree_id btree_id,
- struct bkey_s_c k)
-{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct bch_extent_crc_unpacked crc;
- struct extent_ptr_decoded p;
- int ret;
-
- m->btree_id = btree_id;
- m->data_cmd = data_cmd;
- m->data_opts = data_opts;
- m->nr_ptrs_reserved = 0;
-
- bch2_write_op_init(&m->op, c, io_opts);
-
- if (!bch2_bkey_is_incompressible(k))
- m->op.compression_type =
- bch2_compression_opt_to_type[io_opts.background_compression ?:
- io_opts.compression];
- else
- m->op.incompressible = true;
-
- m->op.target = data_opts.target,
- m->op.write_point = wp;
-
- /*
- * op->csum_type is normally initialized from the fs/file's current
- * options - but if an extent is encrypted, we require that it stays
- * encrypted:
- */
- bkey_for_each_crc(k.k, ptrs, crc, entry)
- if (bch2_csum_type_is_encryption(crc.csum_type)) {
- m->op.nonce = crc.nonce + crc.offset;
- m->op.csum_type = crc.csum_type;
- break;
- }
-
- if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
- m->op.alloc_reserve = RESERVE_movinggc;
- } else {
- /* XXX: this should probably be passed in */
- m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
- }
-
- m->op.flags |= BCH_WRITE_PAGES_STABLE|
- BCH_WRITE_PAGES_OWNED|
- BCH_WRITE_DATA_ENCODED|
- BCH_WRITE_FROM_INTERNAL|
- BCH_WRITE_MOVE;
-
- m->op.nr_replicas = data_opts.nr_replicas;
- m->op.nr_replicas_required = data_opts.nr_replicas;
-
- switch (data_cmd) {
- case DATA_ADD_REPLICAS: {
- /*
- * DATA_ADD_REPLICAS is used for moving data to a different
- * device in the background, and due to compression the new copy
- * might take up more space than the old copy:
- */
-#if 0
- int nr = (int) io_opts.data_replicas -
- bch2_bkey_nr_ptrs_allocated(k);
-#endif
- int nr = (int) io_opts.data_replicas;
-
- if (nr > 0) {
- m->op.nr_replicas = m->nr_ptrs_reserved = nr;
-
- ret = bch2_disk_reservation_get(c, &m->op.res,
- k.k->size, m->op.nr_replicas, 0);
- if (ret)
- return ret;
- }
- break;
- }
- case DATA_REWRITE: {
- unsigned compressed_sectors = 0;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (p.ptr.dev == data_opts.rewrite_dev) {
- if (p.ptr.cached)
- m->op.flags |= BCH_WRITE_CACHED;
-
- if (!p.ptr.cached &&
- crc_is_compressed(p.crc))
- compressed_sectors += p.crc.compressed_size;
- }
-
- if (compressed_sectors) {
- ret = bch2_disk_reservation_add(c, &m->op.res,
- k.k->size * m->op.nr_replicas,
- BCH_DISK_RESERVATION_NOFAIL);
- if (ret)
- return ret;
- }
- break;
- }
- case DATA_PROMOTE:
- m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
- m->op.flags |= BCH_WRITE_CACHED;
- break;
- default:
- BUG();
- }
-
- return 0;
-}
-
static void move_free(struct moving_io *io)
{
struct moving_context *ctxt = io->write.ctxt;
@@ -457,7 +72,7 @@ static void move_write(struct moving_io *io)
closure_get(&io->write.ctxt->cl);
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
- bch2_migrate_read_done(&io->write, &io->rbio);
+ bch2_data_update_read_done(&io->write, &io->rbio);
closure_call(&io->write.op.cl, bch2_write, NULL, NULL);
}
@@ -564,8 +179,8 @@ static int bch2_move_extent(struct btree_trans *trans,
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
- ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
- data_cmd, data_opts, btree_id, k);
+ ret = bch2_data_update_init(c, &io->write, wp, io_opts,
+ data_cmd, data_opts, btree_id, k);
if (ret)
goto err_free_pages;
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 901d8f875946..6d273f67a82c 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -4,51 +4,26 @@
#include "btree_iter.h"
#include "buckets.h"
-#include "io_types.h"
+#include "data_update.h"
#include "move_types.h"
struct bch_read_bio;
-struct moving_context;
-enum data_cmd {
- DATA_SKIP,
- DATA_SCRUB,
- DATA_ADD_REPLICAS,
- DATA_REWRITE,
- DATA_PROMOTE,
-};
-
-struct data_opts {
- u16 target;
- u8 rewrite_dev;
- u8 nr_replicas;
- int btree_insert_flags;
-};
+struct moving_context {
+ /* Closure for waiting on all reads and writes to complete */
+ struct closure cl;
-struct migrate_write {
- enum btree_id btree_id;
- enum data_cmd data_cmd;
- struct data_opts data_opts;
+ struct bch_move_stats *stats;
- unsigned nr_ptrs_reserved;
+ struct list_head reads;
- struct moving_context *ctxt;
+ /* in flight sectors: */
+ atomic_t read_sectors;
+ atomic_t write_sectors;
- /* what we read: */
- struct bch_extent_ptr ptr;
- u64 offset;
-
- struct bch_write_op op;
+ wait_queue_head_t wait;
};
-int bch2_migrate_index_update(struct bch_write_op *);
-void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
-int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
- struct write_point_specifier,
- struct bch_io_opts,
- enum data_cmd, struct data_opts,
- enum btree_id, struct bkey_s_c);
-
typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
struct bkey_s_c,
struct bch_io_opts *, struct data_opts *);