summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/data_update.c209
-rw-r--r--fs/bcachefs/data_update.h39
-rw-r--r--fs/bcachefs/extents.c83
-rw-r--r--fs/bcachefs/extents.h8
-rw-r--r--fs/bcachefs/io.c13
-rw-r--r--fs/bcachefs/move.c140
-rw-r--r--fs/bcachefs/move.h5
-rw-r--r--fs/bcachefs/movinggc.c63
-rw-r--r--fs/bcachefs/rebalance.c84
9 files changed, 304 insertions, 340 deletions
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 0161b0a9f36e..f7bce89f84ed 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -89,6 +89,16 @@ next:
return ret;
}
+static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ struct bch_extent_ptr *ptr;
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->dev == dev)
+ ptr->cached = true;
+}
+
int bch2_data_update_index_update(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
@@ -113,6 +123,7 @@ int bch2_data_update_index_update(struct bch_write_op *op)
while (1) {
struct bkey_s_c k;
+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
struct bkey_i *insert;
struct bkey_i_extent *new;
const union bch_extent_entry *entry;
@@ -121,6 +132,7 @@ int bch2_data_update_index_update(struct bch_write_op *op)
bool did_work = false;
bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
+ unsigned i;
bch2_trans_begin(&trans);
@@ -131,8 +143,7 @@ int bch2_data_update_index_update(struct bch_write_op *op)
new = bkey_i_to_extent(bch2_keylist_front(keys));
- if (bversion_cmp(k.k->version, new->k.version) ||
- !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
+ if (!bch2_extents_match(k, old))
goto nomatch;
bkey_reassemble(_insert.k, k);
@@ -146,20 +157,34 @@ int bch2_data_update_index_update(struct bch_write_op *op)
bch2_cut_back(new->k.p, insert);
bch2_cut_back(insert->k.p, &new->k_i);
- if (m->data_cmd == DATA_REWRITE) {
- struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
- bch2_bkey_has_device(bkey_i_to_s_c(insert),
- m->data_opts.rewrite_dev);
- if (!old_ptr)
- goto nomatch;
-
- if (old_ptr->cached)
- extent_for_each_ptr(extent_i_to_s(new), new_ptr)
- new_ptr->cached = true;
-
- __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+ /*
+ * @old: extent that we read from
+ * @insert: key that we're going to update, initialized from
+ * extent currently in btree - same as @old unless we raced with
+ * other updates
+ * @new: extent with new pointers that we'll be adding to @insert
+ *
+ * Fist, drop rewrite_ptrs from @new:
+ */
+ i = 0;
+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) {
+ /*
+ * If we're going to be adding a pointer to the
+ * same device, we have to drop the old one -
+ * otherwise, we can just mark it cached:
+ */
+ if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev))
+ bch2_bkey_drop_device_noerror(bkey_i_to_s(insert), p.ptr.dev);
+ else
+ bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev);
+ }
+ i++;
}
+
+ /* Add new ptrs: */
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
/*
@@ -177,12 +202,8 @@ int bch2_data_update_index_update(struct bch_write_op *op)
if (!did_work)
goto nomatch;
- bch2_bkey_narrow_crcs(insert,
- (struct bch_extent_crc_unpacked) { 0 });
+ bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
bch2_extent_normalize(c, bkey_i_to_s(insert));
- bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
- op->opts.background_target,
- op->opts.data_replicas);
ret = bch2_sum_sector_overwrites(&trans, &iter, insert,
&extending,
@@ -250,134 +271,100 @@ out:
return ret;
}
-void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio)
+void bch2_data_update_read_done(struct data_update *m,
+ struct bch_extent_crc_unpacked crc)
{
/* write bio must own pages: */
BUG_ON(!m->op.wbio.bio.bi_vcnt);
- m->ptr = rbio->pick.ptr;
- m->offset = rbio->data_pos.offset - rbio->pick.crc.offset;
- m->op.devs_have = rbio->devs_have;
- m->op.pos = rbio->data_pos;
- m->op.version = rbio->version;
- m->op.crc = rbio->pick.crc;
- m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
+ m->op.crc = crc;
+ m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
- if (m->data_cmd == DATA_REWRITE)
- bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
+ closure_call(&m->op.cl, bch2_write, NULL, NULL);
+}
+
+void bch2_data_update_exit(struct data_update *update)
+{
+ struct bch_fs *c = update->op.c;
+
+ bch2_bkey_buf_exit(&update->k, c);
+ bch2_disk_reservation_put(c, &update->op.res);
+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
}
int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
struct write_point_specifier wp,
struct bch_io_opts io_opts,
- enum data_cmd data_cmd,
- struct data_opts data_opts,
+ struct data_update_opts data_opts,
enum btree_id btree_id,
struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
- struct bch_extent_crc_unpacked crc;
struct extent_ptr_decoded p;
+ unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
int ret;
+ bch2_bkey_buf_init(&m->k);
+ bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
- m->data_cmd = data_cmd;
m->data_opts = data_opts;
- m->nr_ptrs_reserved = 0;
bch2_write_op_init(&m->op, c, io_opts);
-
- if (!bch2_bkey_is_incompressible(k))
- m->op.compression_type =
- bch2_compression_opt_to_type[io_opts.background_compression ?:
- io_opts.compression];
- else
- m->op.incompressible = true;
-
+ m->op.pos = bkey_start_pos(k.k);
+ m->op.version = k.k->version;
m->op.target = data_opts.target,
m->op.write_point = wp;
-
- /*
- * op->csum_type is normally initialized from the fs/file's current
- * options - but if an extent is encrypted, we require that it stays
- * encrypted:
- */
- bkey_for_each_crc(k.k, ptrs, crc, entry)
- if (bch2_csum_type_is_encryption(crc.csum_type)) {
- m->op.nonce = crc.nonce + crc.offset;
- m->op.csum_type = crc.csum_type;
- break;
- }
-
- if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
- m->op.alloc_reserve = RESERVE_movinggc;
- } else {
- /* XXX: this should probably be passed in */
- m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
- }
-
- m->op.flags |= BCH_WRITE_PAGES_STABLE|
+ m->op.flags |= BCH_WRITE_PAGES_STABLE|
BCH_WRITE_PAGES_OWNED|
BCH_WRITE_DATA_ENCODED|
BCH_WRITE_FROM_INTERNAL|
- BCH_WRITE_MOVE;
+ BCH_WRITE_MOVE|
+ m->data_opts.write_flags;
+ m->op.compression_type =
+ bch2_compression_opt_to_type[io_opts.background_compression ?:
+ io_opts.compression];
+ if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
+ m->op.alloc_reserve = RESERVE_movinggc;
- m->op.nr_replicas = data_opts.nr_replicas;
- m->op.nr_replicas_required = data_opts.nr_replicas;
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.ptr.cached)
+ m->data_opts.rewrite_ptrs &= ~(1U << i);
- switch (data_cmd) {
- case DATA_ADD_REPLICAS: {
- /*
- * DATA_ADD_REPLICAS is used for moving data to a different
- * device in the background, and due to compression the new copy
- * might take up more space than the old copy:
- */
-#if 0
- int nr = (int) io_opts.data_replicas -
- bch2_bkey_nr_ptrs_allocated(k);
-#endif
- int nr = (int) io_opts.data_replicas;
+ if (!((1U << i) & m->data_opts.rewrite_ptrs))
+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
- if (nr > 0) {
- m->op.nr_replicas = m->nr_ptrs_reserved = nr;
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ crc_is_compressed(p.crc))
+ reserve_sectors += k.k->size;
- ret = bch2_disk_reservation_get(c, &m->op.res,
- k.k->size, m->op.nr_replicas, 0);
- if (ret)
- return ret;
+ /*
+ * op->csum_type is normally initialized from the fs/file's
+ * current options - but if an extent is encrypted, we require
+ * that it stays encrypted:
+ */
+ if (bch2_csum_type_is_encryption(p.crc.csum_type)) {
+ m->op.nonce = p.crc.nonce + p.crc.offset;
+ m->op.csum_type = p.crc.csum_type;
}
- break;
- }
- case DATA_REWRITE: {
- unsigned compressed_sectors = 0;
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (p.ptr.dev == data_opts.rewrite_dev) {
- if (p.ptr.cached)
- m->op.flags |= BCH_WRITE_CACHED;
-
- if (!p.ptr.cached &&
- crc_is_compressed(p.crc))
- compressed_sectors += p.crc.compressed_size;
- }
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+ m->op.incompressible = true;
- if (compressed_sectors) {
- ret = bch2_disk_reservation_add(c, &m->op.res,
- k.k->size * m->op.nr_replicas,
- BCH_DISK_RESERVATION_NOFAIL);
- if (ret)
- return ret;
- }
- break;
+ i++;
}
- case DATA_PROMOTE:
- m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
- m->op.flags |= BCH_WRITE_CACHED;
- break;
- default:
- BUG();
+
+ if (reserve_sectors) {
+ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
+ m->data_opts.extra_replicas
+ ? 0
+ : BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ return ret;
}
+ m->op.nr_replicas = m->op.nr_replicas_required =
+ hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas;
return 0;
}
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 03b4ca5a4ee8..ee38bd655af1 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -3,46 +3,37 @@
#ifndef _BCACHEFS_DATA_UPDATE_H
#define _BCACHEFS_DATA_UPDATE_H
+#include "bkey_buf.h"
#include "io_types.h"
-enum data_cmd {
- DATA_SKIP,
- DATA_SCRUB,
- DATA_ADD_REPLICAS,
- DATA_REWRITE,
- DATA_PROMOTE,
-};
+struct moving_context;
-struct data_opts {
+struct data_update_opts {
+ unsigned rewrite_ptrs;
u16 target;
- u8 rewrite_dev;
- u8 nr_replicas;
- int btree_insert_flags;
+ u8 extra_replicas;
+ unsigned btree_insert_flags;
+ unsigned write_flags;
};
struct data_update {
+ /* extent being updated: */
enum btree_id btree_id;
- enum data_cmd data_cmd;
- struct data_opts data_opts;
-
- unsigned nr_ptrs_reserved;
-
+ struct bkey_buf k;
+ struct data_update_opts data_opts;
struct moving_context *ctxt;
-
- /* what we read: */
- struct bch_extent_ptr ptr;
- u64 offset;
-
struct bch_write_op op;
};
int bch2_data_update_index_update(struct bch_write_op *);
-void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *);
+void bch2_data_update_read_done(struct data_update *,
+ struct bch_extent_crc_unpacked);
+
+void bch2_data_update_exit(struct data_update *);
int bch2_data_update_init(struct bch_fs *, struct data_update *,
struct write_point_specifier,
- struct bch_io_opts,
- enum data_cmd, struct data_opts,
+ struct bch_io_opts, struct data_update_opts,
enum btree_id, struct bkey_s_c);
#endif /* _BCACHEFS_DATA_UPDATE_H */
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 4e44234a2b2c..38836c1990aa 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -25,6 +25,8 @@
#include "trace.h"
#include "util.h"
+static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
+
static unsigned bch2_crc_field_size_max[] = {
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
@@ -687,37 +689,6 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
return durability;
}
-void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
- unsigned target,
- unsigned nr_desired_replicas)
-{
- struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
- union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
-
- if (target && extra > 0)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- int n = bch2_extent_ptr_durability(c, p);
-
- if (n && n <= extra &&
- !bch2_dev_in_target(c, p.ptr.dev, target)) {
- entry->ptr.cached = true;
- extra -= n;
- }
- }
-
- if (extra > 0)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- int n = bch2_extent_ptr_durability(c, p);
-
- if (n && n <= extra) {
- entry->ptr.cached = true;
- extra -= n;
- }
- }
-}
-
void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
@@ -821,8 +792,8 @@ static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
/*
* Returns pointer to the next entry after the one being dropped:
*/
-union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry = to_entry(ptr), *next;
@@ -894,6 +865,14 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
}
+void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
+{
+ struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev);
+
+ if (ptr)
+ __bch2_bkey_drop_ptr(k, ptr);
+}
+
const struct bch_extent_ptr *
bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
{
@@ -939,6 +918,44 @@ bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
}
/*
+ * Returns true if two extents refer to the same data:
+ */
+bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
+{
+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1);
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry1, *entry2;
+ struct extent_ptr_decoded p1, p2;
+
+ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+}
+
+bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
+ struct bkey_s_c k2)
+{
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry2;
+ struct extent_ptr_decoded p2;
+
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+}
+
+/*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc.
*
* Returns true if @k should be dropped entirely
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 4f41f0fd6cb1..3c17b81130bb 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -577,15 +577,10 @@ unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
-void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
- unsigned, unsigned);
-
void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);
-union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s,
- struct bch_extent_ptr *);
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
struct bch_extent_ptr *);
@@ -607,11 +602,14 @@ do { \
} while (0)
void bch2_bkey_drop_device(struct bkey_s, unsigned);
+void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_extent_ptr, u64);
+bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
+bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 743449ed7fae..c22ce1eb6b8b 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -1490,13 +1490,12 @@ static void promote_done(struct bch_write_op *wop)
bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
op->start_time);
- bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
+ bch2_data_update_exit(&op->write);
promote_free(c, op);
}
static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
{
- struct bch_fs *c = rbio->c;
struct bio *bio = &op->write.op.wbio.bio;
trace_promote(&rbio->bio);
@@ -1509,9 +1508,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
- bch2_data_update_read_done(&op->write, rbio);
-
- closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, NULL);
+ bch2_data_update_read_done(&op->write, rbio->pick.crc);
}
static struct promote_op *__promote_alloc(struct bch_fs *c,
@@ -1569,10 +1566,10 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
ret = bch2_data_update_init(c, &op->write,
writepoint_hashed((unsigned long) current),
opts,
- DATA_PROMOTE,
- (struct data_opts) {
+ (struct data_update_opts) {
.target = opts.promote_target,
- .nr_replicas = 1,
+ .extra_replicas = 1,
+ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
},
btree_id, k);
BUG_ON(ret);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index a3a486cff28e..4060678cf716 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -38,17 +38,9 @@ struct moving_io {
static void move_free(struct moving_io *io)
{
struct moving_context *ctxt = io->write.ctxt;
- struct bvec_iter_all iter;
- struct bio_vec *bv;
-
- bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
-
- bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
- if (bv->bv_page)
- __free_page(bv->bv_page);
+ bch2_data_update_exit(&io->write);
wake_up(&ctxt->wait);
-
kfree(io);
}
@@ -72,8 +64,7 @@ static void move_write(struct moving_io *io)
closure_get(&io->write.ctxt->cl);
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
- bch2_data_update_read_done(&io->write, &io->rbio);
- closure_call(&io->write.op.cl, bch2_write, NULL, NULL);
+ bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
}
static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
@@ -135,8 +126,7 @@ static int bch2_move_extent(struct btree_trans *trans,
struct bch_io_opts io_opts,
enum btree_id btree_id,
struct bkey_s_c k,
- enum data_cmd data_cmd,
- struct data_opts data_opts)
+ struct data_update_opts data_opts)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -180,10 +170,11 @@ static int bch2_move_extent(struct btree_trans *trans,
io->rbio.bio.bi_end_io = move_read_endio;
ret = bch2_data_update_init(c, &io->write, wp, io_opts,
- data_cmd, data_opts, btree_id, k);
+ data_opts, btree_id, k);
if (ret)
goto err_free_pages;
+ io->write.ctxt = ctxt;
io->write.op.end_io = move_write_done;
atomic64_inc(&ctxt->stats->keys_moved);
@@ -262,8 +253,7 @@ static int __bch2_move_data(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- struct data_opts data_opts;
- enum data_cmd data_cmd;
+ struct data_update_opts data_opts;
u64 delay, cur_inum = U64_MAX;
int ret = 0, ret2;
@@ -350,18 +340,9 @@ static int __bch2_move_data(struct bch_fs *c,
cur_inum = k.k->p.inode;
}
- switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
- case DATA_SKIP:
+ memset(&data_opts, 0, sizeof(data_opts));
+ if (!pred(c, arg, k, &io_opts, &data_opts))
goto next;
- case DATA_SCRUB:
- BUG();
- case DATA_ADD_REPLICAS:
- case DATA_REWRITE:
- case DATA_PROMOTE:
- break;
- default:
- BUG();
- }
/*
* The iterator gets unlocked by __bch2_read_extent - need to
@@ -370,8 +351,8 @@ static int __bch2_move_data(struct bch_fs *c,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
- data_cmd, data_opts);
+ ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts,
+ btree_id, k, data_opts);
if (ret2) {
if (ret2 == -EINTR)
continue;
@@ -476,9 +457,9 @@ int bch2_move_data(struct bch_fs *c,
return ret;
}
-typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
- struct btree *, struct bch_io_opts *,
- struct data_opts *);
+typedef bool (*move_btree_pred)(struct bch_fs *, void *,
+ struct btree *, struct bch_io_opts *,
+ struct data_update_opts *);
static int bch2_move_btree(struct bch_fs *c,
enum btree_id start_btree_id, struct bpos start_pos,
@@ -492,8 +473,7 @@ static int bch2_move_btree(struct bch_fs *c,
struct btree_iter iter;
struct btree *b;
enum btree_id id;
- struct data_opts data_opts;
- enum data_cmd cmd;
+ struct data_update_opts data_opts;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
@@ -522,17 +502,8 @@ retry:
stats->pos = iter.pos;
- switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
- case DATA_SKIP:
+ if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
- case DATA_SCRUB:
- BUG();
- case DATA_ADD_REPLICAS:
- case DATA_REWRITE:
- break;
- default:
- BUG();
- }
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
if (ret == -EINTR)
@@ -562,20 +533,10 @@ next:
return ret;
}
-#if 0
-static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
-{
- return DATA_SCRUB;
-}
-#endif
-
-static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool rereplicate_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
unsigned nr_good = bch2_bkey_durability(c, k);
unsigned replicas = bkey_is_btree_ptr(k.k)
@@ -583,43 +544,50 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
: io_opts->data_replicas;
if (!nr_good || nr_good >= replicas)
- return DATA_SKIP;
+ return false;
data_opts->target = 0;
- data_opts->nr_replicas = 1;
+ data_opts->extra_replicas = replicas - nr_good;
data_opts->btree_insert_flags = 0;
- return DATA_ADD_REPLICAS;
+ return true;
}
-static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool migrate_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const struct bch_extent_ptr *ptr;
struct bch_ioctl_data *op = arg;
+ unsigned i = 0;
- if (!bch2_bkey_has_device(k, op->migrate.dev))
- return DATA_SKIP;
-
+ data_opts->rewrite_ptrs = 0;
data_opts->target = 0;
- data_opts->nr_replicas = 1;
+ data_opts->extra_replicas = 0;
data_opts->btree_insert_flags = 0;
- data_opts->rewrite_dev = op->migrate.dev;
- return DATA_REWRITE;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == op->migrate.dev)
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+
+ return data_opts->rewrite_ptrs != 0;;
}
-static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
- struct btree *b,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
-static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
- struct btree *b,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool migrate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
@@ -648,21 +616,21 @@ static bool bformat_needs_redo(struct bkey_format *f)
return false;
}
-static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
- struct btree *b,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
if (b->version_ondisk != c->sb.version ||
btree_node_need_rewrite(b) ||
bformat_needs_redo(&b->format)) {
data_opts->target = 0;
- data_opts->nr_replicas = 1;
+ data_opts->extra_replicas = 0;
data_opts->btree_insert_flags = 0;
- return DATA_REWRITE;
+ return true;
}
- return DATA_SKIP;
+ return false;
}
int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 6d273f67a82c..fd5562909382 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -24,9 +24,8 @@ struct moving_context {
wait_queue_head_t wait;
};
-typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
- struct bkey_s_c,
- struct bch_io_opts *, struct data_opts *);
+typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
+ struct bch_io_opts *, struct data_update_opts *);
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 1e2de1e818c1..d63b9fea4f05 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -39,15 +39,32 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
cmp_int(l->offset, r->offset);
}
-static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
+static bool copygc_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
copygc_heap *h = &c->copygc_heap;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p = { 0 };
+ unsigned i = 0;
+
+ /*
+ * We need to use the journal reserve here, because
+ * - journal reclaim depends on btree key cache
+ * flushing to make forward progress,
+ * - which has to make forward progress when the
+ * journal is pre-reservation full,
+ * - and depends on allocation - meaning allocator and
+ * copygc
+ */
+
+ data_opts->rewrite_ptrs = 0;
+ data_opts->target = io_opts->background_target;
+ data_opts->extra_replicas = 0;
+ data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
+ JOURNAL_WATERMARK_copygc;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
@@ -55,12 +72,12 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
.dev = p.ptr.dev,
.offset = p.ptr.offset,
};
- ssize_t i;
+ ssize_t eytz;
if (p.ptr.cached)
continue;
- i = eytzinger0_find_le(h->data, h->used,
+ eytz = eytzinger0_find_le(h->data, h->used,
sizeof(h->data[0]),
bucket_offset_cmp, &search);
#if 0
@@ -74,34 +91,16 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
BUG_ON(i != j);
#endif
- if (i >= 0 &&
- p.ptr.dev == h->data[i].dev &&
- p.ptr.offset < h->data[i].offset + ca->mi.bucket_size &&
- p.ptr.gen == h->data[i].gen) {
- /*
- * We need to use the journal reserve here, because
- * - journal reclaim depends on btree key cache
- * flushing to make forward progress,
- * - which has to make forward progress when the
- * journal is pre-reservation full,
- * - and depends on allocation - meaning allocator and
- * copygc
- */
-
- data_opts->target = io_opts->background_target;
- data_opts->nr_replicas = 1;
- data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
- JOURNAL_WATERMARK_copygc;
- data_opts->rewrite_dev = p.ptr.dev;
-
- if (p.has_ec)
- data_opts->nr_replicas += p.ec.redundancy;
-
- return DATA_REWRITE;
- }
+ if (eytz >= 0 &&
+ p.ptr.dev == h->data[eytz].dev &&
+ p.ptr.offset < h->data[eytz].offset + ca->mi.bucket_size &&
+ p.ptr.gen == h->data[eytz].gen)
+ data_opts->rewrite_ptrs |= 1U << i;
+
+ i++;
}
- return DATA_SKIP;
+ return data_opts->rewrite_ptrs != 0;
}
static inline int fragmentation_cmp(copygc_heap *heap,
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 1724ae36c0f4..63b24dc9c917 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -22,62 +22,70 @@
* returns -1 if it should not be moved, or
* device of pointer that should be moved, if known, or INT_MAX if unknown
*/
-static int __bch2_rebalance_pred(struct bch_fs *c,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts)
+static bool rebalance_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
+ unsigned i;
+
+ data_opts->rewrite_ptrs = 0;
+ data_opts->target = io_opts->background_target;
+ data_opts->extra_replicas = 0;
+ data_opts->btree_insert_flags = 0;
if (io_opts->background_compression &&
- !bch2_bkey_is_incompressible(k))
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ !bch2_bkey_is_incompressible(k)) {
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (!p.ptr.cached &&
p.crc.compression_type !=
bch2_compression_opt_to_type[io_opts->background_compression])
- return p.ptr.dev;
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
- if (io_opts->background_target)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached &&
- !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
- return p.ptr.dev;
+ if (io_opts->background_target) {
+ const struct bch_extent_ptr *ptr;
- return -1;
+ i = 0;
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (!ptr->cached &&
+ !bch2_dev_in_target(c, ptr->dev, io_opts->background_target))
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+
+ return data_opts->rewrite_ptrs != 0;
}
void bch2_rebalance_add_key(struct bch_fs *c,
struct bkey_s_c k,
struct bch_io_opts *io_opts)
{
- atomic64_t *counter;
- int dev;
+ struct data_update_opts update_opts = { 0 };
+ struct bkey_ptrs_c ptrs;
+ const struct bch_extent_ptr *ptr;
+ unsigned i;
- dev = __bch2_rebalance_pred(c, k, io_opts);
- if (dev < 0)
+ if (!rebalance_pred(c, NULL, k, io_opts, &update_opts))
return;
- counter = dev < INT_MAX
- ? &bch_dev_bkey_exists(c, dev)->rebalance_work
- : &c->rebalance.work_unknown_dev;
-
- if (atomic64_add_return(k.k->size, counter) == k.k->size)
- rebalance_wakeup(c);
-}
-
-static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
- struct bkey_s_c k,
- struct bch_io_opts *io_opts,
- struct data_opts *data_opts)
-{
- if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
- data_opts->target = io_opts->background_target;
- data_opts->nr_replicas = 1;
- data_opts->btree_insert_flags = 0;
- return DATA_ADD_REPLICAS;
- } else {
- return DATA_SKIP;
+ i = 0;
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr) {
+ if ((1U << i) && update_opts.rewrite_ptrs)
+ if (atomic64_add_return(k.k->size,
+ &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) ==
+ k.k->size)
+ rebalance_wakeup(c);
+ i++;
}
}