diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/data_update.c | 209 | ||||
-rw-r--r-- | fs/bcachefs/data_update.h | 39 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 83 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 140 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 63 | ||||
-rw-r--r-- | fs/bcachefs/rebalance.c | 84 |
9 files changed, 304 insertions, 340 deletions
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0161b0a9f36e..f7bce89f84ed 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -89,6 +89,16 @@ next: return ret; } +static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev) +{ + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); + struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->dev == dev) + ptr->cached = true; +} + int bch2_data_update_index_update(struct bch_write_op *op) { struct bch_fs *c = op->c; @@ -113,6 +123,7 @@ int bch2_data_update_index_update(struct bch_write_op *op) while (1) { struct bkey_s_c k; + struct bkey_s_c old = bkey_i_to_s_c(m->k.k); struct bkey_i *insert; struct bkey_i_extent *new; const union bch_extent_entry *entry; @@ -121,6 +132,7 @@ int bch2_data_update_index_update(struct bch_write_op *op) bool did_work = false; bool extending = false, should_check_enospc; s64 i_sectors_delta = 0, disk_sectors_delta = 0; + unsigned i; bch2_trans_begin(&trans); @@ -131,8 +143,7 @@ int bch2_data_update_index_update(struct bch_write_op *op) new = bkey_i_to_extent(bch2_keylist_front(keys)); - if (bversion_cmp(k.k->version, new->k.version) || - !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) + if (!bch2_extents_match(k, old)) goto nomatch; bkey_reassemble(_insert.k, k); @@ -146,20 +157,34 @@ int bch2_data_update_index_update(struct bch_write_op *op) bch2_cut_back(new->k.p, insert); bch2_cut_back(insert->k.p, &new->k_i); - if (m->data_cmd == DATA_REWRITE) { - struct bch_extent_ptr *new_ptr, *old_ptr = (void *) - bch2_bkey_has_device(bkey_i_to_s_c(insert), - m->data_opts.rewrite_dev); - if (!old_ptr) - goto nomatch; - - if (old_ptr->cached) - extent_for_each_ptr(extent_i_to_s(new), new_ptr) - new_ptr->cached = true; - - __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); + /* + * @old: extent that we read from + * @insert: key that we're going to update, initialized from + * extent currently in btree - same as @old unless we raced with + * other updates + * @new: extent with new pointers that we'll be adding to @insert + * + * Fist, drop rewrite_ptrs from @new: + */ + i = 0; + bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { + if (((1U << i) & m->data_opts.rewrite_ptrs) && + bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) { + /* + * If we're going to be adding a pointer to the + * same device, we have to drop the old one - + * otherwise, we can just mark it cached: + */ + if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev)) + bch2_bkey_drop_device_noerror(bkey_i_to_s(insert), p.ptr.dev); + else + bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev); + } + i++; } + + /* Add new ptrs: */ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { /* @@ -177,12 +202,8 @@ int bch2_data_update_index_update(struct bch_write_op *op) if (!did_work) goto nomatch; - bch2_bkey_narrow_crcs(insert, - (struct bch_extent_crc_unpacked) { 0 }); + bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); bch2_extent_normalize(c, bkey_i_to_s(insert)); - bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert), - op->opts.background_target, - op->opts.data_replicas); ret = bch2_sum_sector_overwrites(&trans, &iter, insert, &extending, @@ -250,134 +271,100 @@ out: return ret; } -void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio) +void bch2_data_update_read_done(struct data_update *m, + struct bch_extent_crc_unpacked crc) { /* write bio must own pages: */ BUG_ON(!m->op.wbio.bio.bi_vcnt); - m->ptr = rbio->pick.ptr; - m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; - m->op.devs_have = rbio->devs_have; - m->op.pos = rbio->data_pos; - m->op.version = rbio->version; - m->op.crc = rbio->pick.crc; - m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; + m->op.crc = crc; + m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; - if (m->data_cmd == DATA_REWRITE) - bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); + closure_call(&m->op.cl, bch2_write, NULL, NULL); +} + +void bch2_data_update_exit(struct data_update *update) +{ + struct bch_fs *c = update->op.c; + + bch2_bkey_buf_exit(&update->k, c); + bch2_disk_reservation_put(c, &update->op.res); + bch2_bio_free_pages_pool(c, &update->op.wbio.bio); } int bch2_data_update_init(struct bch_fs *c, struct data_update *m, struct write_point_specifier wp, struct bch_io_opts io_opts, - enum data_cmd data_cmd, - struct data_opts data_opts, + struct data_update_opts data_opts, enum btree_id btree_id, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; struct extent_ptr_decoded p; + unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; int ret; + bch2_bkey_buf_init(&m->k); + bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; - m->data_cmd = data_cmd; m->data_opts = data_opts; - m->nr_ptrs_reserved = 0; bch2_write_op_init(&m->op, c, io_opts); - - if (!bch2_bkey_is_incompressible(k)) - m->op.compression_type = - bch2_compression_opt_to_type[io_opts.background_compression ?: - io_opts.compression]; - else - m->op.incompressible = true; - + m->op.pos = bkey_start_pos(k.k); + m->op.version = k.k->version; m->op.target = data_opts.target, m->op.write_point = wp; - - /* - * op->csum_type is normally initialized from the fs/file's current - * options - but if an extent is encrypted, we require that it stays - * encrypted: - */ - bkey_for_each_crc(k.k, ptrs, crc, entry) - if (bch2_csum_type_is_encryption(crc.csum_type)) { - m->op.nonce = crc.nonce + crc.offset; - m->op.csum_type = crc.csum_type; - break; - } - - if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { - m->op.alloc_reserve = RESERVE_movinggc; - } else { - /* XXX: this should probably be passed in */ - m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; - } - - m->op.flags |= BCH_WRITE_PAGES_STABLE| + m->op.flags |= BCH_WRITE_PAGES_STABLE| BCH_WRITE_PAGES_OWNED| BCH_WRITE_DATA_ENCODED| BCH_WRITE_FROM_INTERNAL| - BCH_WRITE_MOVE; + BCH_WRITE_MOVE| + m->data_opts.write_flags; + m->op.compression_type = + bch2_compression_opt_to_type[io_opts.background_compression ?: + io_opts.compression]; + if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) + m->op.alloc_reserve = RESERVE_movinggc; - m->op.nr_replicas = data_opts.nr_replicas; - m->op.nr_replicas_required = data_opts.nr_replicas; + i = 0; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.cached) + m->data_opts.rewrite_ptrs &= ~(1U << i); - switch (data_cmd) { - case DATA_ADD_REPLICAS: { - /* - * DATA_ADD_REPLICAS is used for moving data to a different - * device in the background, and due to compression the new copy - * might take up more space than the old copy: - */ -#if 0 - int nr = (int) io_opts.data_replicas - - bch2_bkey_nr_ptrs_allocated(k); -#endif - int nr = (int) io_opts.data_replicas; + if (!((1U << i) & m->data_opts.rewrite_ptrs)) + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - if (nr > 0) { - m->op.nr_replicas = m->nr_ptrs_reserved = nr; + if (((1U << i) & m->data_opts.rewrite_ptrs) && + crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; - ret = bch2_disk_reservation_get(c, &m->op.res, - k.k->size, m->op.nr_replicas, 0); - if (ret) - return ret; + /* + * op->csum_type is normally initialized from the fs/file's + * current options - but if an extent is encrypted, we require + * that it stays encrypted: + */ + if (bch2_csum_type_is_encryption(p.crc.csum_type)) { + m->op.nonce = p.crc.nonce + p.crc.offset; + m->op.csum_type = p.crc.csum_type; } - break; - } - case DATA_REWRITE: { - unsigned compressed_sectors = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (p.ptr.dev == data_opts.rewrite_dev) { - if (p.ptr.cached) - m->op.flags |= BCH_WRITE_CACHED; - - if (!p.ptr.cached && - crc_is_compressed(p.crc)) - compressed_sectors += p.crc.compressed_size; - } + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) + m->op.incompressible = true; - if (compressed_sectors) { - ret = bch2_disk_reservation_add(c, &m->op.res, - k.k->size * m->op.nr_replicas, - BCH_DISK_RESERVATION_NOFAIL); - if (ret) - return ret; - } - break; + i++; } - case DATA_PROMOTE: - m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; - m->op.flags |= BCH_WRITE_CACHED; - break; - default: - BUG(); + + if (reserve_sectors) { + ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, + m->data_opts.extra_replicas + ? 0 + : BCH_DISK_RESERVATION_NOFAIL); + if (ret) + return ret; } + m->op.nr_replicas = m->op.nr_replicas_required = + hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas; return 0; } diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 03b4ca5a4ee8..ee38bd655af1 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -3,46 +3,37 @@ #ifndef _BCACHEFS_DATA_UPDATE_H #define _BCACHEFS_DATA_UPDATE_H +#include "bkey_buf.h" #include "io_types.h" -enum data_cmd { - DATA_SKIP, - DATA_SCRUB, - DATA_ADD_REPLICAS, - DATA_REWRITE, - DATA_PROMOTE, -}; +struct moving_context; -struct data_opts { +struct data_update_opts { + unsigned rewrite_ptrs; u16 target; - u8 rewrite_dev; - u8 nr_replicas; - int btree_insert_flags; + u8 extra_replicas; + unsigned btree_insert_flags; + unsigned write_flags; }; struct data_update { + /* extent being updated: */ enum btree_id btree_id; - enum data_cmd data_cmd; - struct data_opts data_opts; - - unsigned nr_ptrs_reserved; - + struct bkey_buf k; + struct data_update_opts data_opts; struct moving_context *ctxt; - - /* what we read: */ - struct bch_extent_ptr ptr; - u64 offset; - struct bch_write_op op; }; int bch2_data_update_index_update(struct bch_write_op *); -void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *); +void bch2_data_update_read_done(struct data_update *, + struct bch_extent_crc_unpacked); + +void bch2_data_update_exit(struct data_update *); int bch2_data_update_init(struct bch_fs *, struct data_update *, struct write_point_specifier, - struct bch_io_opts, - enum data_cmd, struct data_opts, + struct bch_io_opts, struct data_update_opts, enum btree_id, struct bkey_s_c); #endif /* _BCACHEFS_DATA_UPDATE_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 4e44234a2b2c..38836c1990aa 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -25,6 +25,8 @@ #include "trace.h" #include "util.h" +static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); + static unsigned bch2_crc_field_size_max[] = { [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX, [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX, @@ -687,37 +689,6 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) return durability; } -void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k, - unsigned target, - unsigned nr_desired_replicas) -{ - struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); - union bch_extent_entry *entry; - struct extent_ptr_decoded p; - int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas; - - if (target && extra > 0) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - int n = bch2_extent_ptr_durability(c, p); - - if (n && n <= extra && - !bch2_dev_in_target(c, p.ptr.dev, target)) { - entry->ptr.cached = true; - extra -= n; - } - } - - if (extra > 0) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - int n = bch2_extent_ptr_durability(c, p); - - if (n && n <= extra) { - entry->ptr.cached = true; - extra -= n; - } - } -} - void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry) { union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); @@ -821,8 +792,8 @@ static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) /* * Returns pointer to the next entry after the one being dropped: */ -union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k, - struct bch_extent_ptr *ptr) +static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k, + struct bch_extent_ptr *ptr) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *entry = to_entry(ptr), *next; @@ -894,6 +865,14 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); } +void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) +{ + struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev); + + if (ptr) + __bch2_bkey_drop_ptr(k, ptr); +} + const struct bch_extent_ptr * bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) { @@ -939,6 +918,44 @@ bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k, } /* + * Returns true if two extents refer to the same data: + */ +bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) +{ + struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1); + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); + const union bch_extent_entry *entry1, *entry2; + struct extent_ptr_decoded p1, p2; + + bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1) + bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) + if (p1.ptr.dev == p2.ptr.dev && + p1.ptr.gen == p2.ptr.gen && + (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == + (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) + return true; + + return false; +} + +bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, + struct bkey_s_c k2) +{ + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); + const union bch_extent_entry *entry2; + struct extent_ptr_decoded p2; + + bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) + if (p1.ptr.dev == p2.ptr.dev && + p1.ptr.gen == p2.ptr.gen && + (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == + (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) + return true; + + return false; +} + +/* * bch_extent_normalize - clean up an extent, dropping stale pointers etc. * * Returns true if @k should be dropped entirely diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 4f41f0fd6cb1..3c17b81130bb 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -577,15 +577,10 @@ unsigned bch2_bkey_sectors_compressed(struct bkey_s_c); unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c); unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); -void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, - unsigned, unsigned); - void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); void bch2_extent_ptr_decoded_append(struct bkey_i *, struct extent_ptr_decoded *); -union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, - struct bch_extent_ptr *); union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); @@ -607,11 +602,14 @@ do { \ } while (0) void bch2_bkey_drop_device(struct bkey_s, unsigned); +void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned); const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned); bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c, struct bch_extent_ptr, u64); +bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); +bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 743449ed7fae..c22ce1eb6b8b 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1490,13 +1490,12 @@ static void promote_done(struct bch_write_op *wop) bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time); - bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio); + bch2_data_update_exit(&op->write); promote_free(c, op); } static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) { - struct bch_fs *c = rbio->c; struct bio *bio = &op->write.op.wbio.bio; trace_promote(&rbio->bio); @@ -1509,9 +1508,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) sizeof(struct bio_vec) * rbio->bio.bi_vcnt); swap(bio->bi_vcnt, rbio->bio.bi_vcnt); - bch2_data_update_read_done(&op->write, rbio); - - closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, NULL); + bch2_data_update_read_done(&op->write, rbio->pick.crc); } static struct promote_op *__promote_alloc(struct bch_fs *c, @@ -1569,10 +1566,10 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, ret = bch2_data_update_init(c, &op->write, writepoint_hashed((unsigned long) current), opts, - DATA_PROMOTE, - (struct data_opts) { + (struct data_update_opts) { .target = opts.promote_target, - .nr_replicas = 1, + .extra_replicas = 1, + .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, }, btree_id, k); BUG_ON(ret); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index a3a486cff28e..4060678cf716 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -38,17 +38,9 @@ struct moving_io { static void move_free(struct moving_io *io) { struct moving_context *ctxt = io->write.ctxt; - struct bvec_iter_all iter; - struct bio_vec *bv; - - bch2_disk_reservation_put(io->write.op.c, &io->write.op.res); - - bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter) - if (bv->bv_page) - __free_page(bv->bv_page); + bch2_data_update_exit(&io->write); wake_up(&ctxt->wait); - kfree(io); } @@ -72,8 +64,7 @@ static void move_write(struct moving_io *io) closure_get(&io->write.ctxt->cl); atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); - bch2_data_update_read_done(&io->write, &io->rbio); - closure_call(&io->write.op.cl, bch2_write, NULL, NULL); + bch2_data_update_read_done(&io->write, io->rbio.pick.crc); } static inline struct moving_io *next_pending_write(struct moving_context *ctxt) @@ -135,8 +126,7 @@ static int bch2_move_extent(struct btree_trans *trans, struct bch_io_opts io_opts, enum btree_id btree_id, struct bkey_s_c k, - enum data_cmd data_cmd, - struct data_opts data_opts) + struct data_update_opts data_opts) { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -180,10 +170,11 @@ static int bch2_move_extent(struct btree_trans *trans, io->rbio.bio.bi_end_io = move_read_endio; ret = bch2_data_update_init(c, &io->write, wp, io_opts, - data_cmd, data_opts, btree_id, k); + data_opts, btree_id, k); if (ret) goto err_free_pages; + io->write.ctxt = ctxt; io->write.op.end_io = move_write_done; atomic64_inc(&ctxt->stats->keys_moved); @@ -262,8 +253,7 @@ static int __bch2_move_data(struct bch_fs *c, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct data_opts data_opts; - enum data_cmd data_cmd; + struct data_update_opts data_opts; u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; @@ -350,18 +340,9 @@ static int __bch2_move_data(struct bch_fs *c, cur_inum = k.k->p.inode; } - switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { - case DATA_SKIP: + memset(&data_opts, 0, sizeof(data_opts)); + if (!pred(c, arg, k, &io_opts, &data_opts)) goto next; - case DATA_SCRUB: - BUG(); - case DATA_ADD_REPLICAS: - case DATA_REWRITE: - case DATA_PROMOTE: - break; - default: - BUG(); - } /* * The iterator gets unlocked by __bch2_read_extent - need to @@ -370,8 +351,8 @@ static int __bch2_move_data(struct bch_fs *c, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, - data_cmd, data_opts); + ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, + btree_id, k, data_opts); if (ret2) { if (ret2 == -EINTR) continue; @@ -476,9 +457,9 @@ int bch2_move_data(struct bch_fs *c, return ret; } -typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, - struct btree *, struct bch_io_opts *, - struct data_opts *); +typedef bool (*move_btree_pred)(struct bch_fs *, void *, + struct btree *, struct bch_io_opts *, + struct data_update_opts *); static int bch2_move_btree(struct bch_fs *c, enum btree_id start_btree_id, struct bpos start_pos, @@ -492,8 +473,7 @@ static int bch2_move_btree(struct bch_fs *c, struct btree_iter iter; struct btree *b; enum btree_id id; - struct data_opts data_opts; - enum data_cmd cmd; + struct data_update_opts data_opts; int ret = 0; bch2_trans_init(&trans, c, 0, 0); @@ -522,17 +502,8 @@ retry: stats->pos = iter.pos; - switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { - case DATA_SKIP: + if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; - case DATA_SCRUB: - BUG(); - case DATA_ADD_REPLICAS: - case DATA_REWRITE: - break; - default: - BUG(); - } ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; if (ret == -EINTR) @@ -562,20 +533,10 @@ next: return ret; } -#if 0 -static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - return DATA_SCRUB; -} -#endif - -static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool rereplicate_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { unsigned nr_good = bch2_bkey_durability(c, k); unsigned replicas = bkey_is_btree_ptr(k.k) @@ -583,43 +544,50 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, : io_opts->data_replicas; if (!nr_good || nr_good >= replicas) - return DATA_SKIP; + return false; data_opts->target = 0; - data_opts->nr_replicas = 1; + data_opts->extra_replicas = replicas - nr_good; data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; + return true; } -static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool migrate_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; struct bch_ioctl_data *op = arg; + unsigned i = 0; - if (!bch2_bkey_has_device(k, op->migrate.dev)) - return DATA_SKIP; - + data_opts->rewrite_ptrs = 0; data_opts->target = 0; - data_opts->nr_replicas = 1; + data_opts->extra_replicas = 0; data_opts->btree_insert_flags = 0; - data_opts->rewrite_dev = op->migrate.dev; - return DATA_REWRITE; + + bkey_for_each_ptr(ptrs, ptr) { + if (ptr->dev == op->migrate.dev) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + + return data_opts->rewrite_ptrs != 0;; } -static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } -static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool migrate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } @@ -648,21 +616,21 @@ static bool bformat_needs_redo(struct bkey_format *f) return false; } -static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, - struct btree *b, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { if (b->version_ondisk != c->sb.version || btree_node_need_rewrite(b) || bformat_needs_redo(&b->format)) { data_opts->target = 0; - data_opts->nr_replicas = 1; + data_opts->extra_replicas = 0; data_opts->btree_insert_flags = 0; - return DATA_REWRITE; + return true; } - return DATA_SKIP; + return false; } int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 6d273f67a82c..fd5562909382 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -24,9 +24,8 @@ struct moving_context { wait_queue_head_t wait; }; -typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, - struct bkey_s_c, - struct bch_io_opts *, struct data_opts *); +typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, + struct bch_io_opts *, struct data_update_opts *); int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 1e2de1e818c1..d63b9fea4f05 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -39,15 +39,32 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) cmp_int(l->offset, r->offset); } -static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) +static bool copygc_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { copygc_heap *h = &c->copygc_heap; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p = { 0 }; + unsigned i = 0; + + /* + * We need to use the journal reserve here, because + * - journal reclaim depends on btree key cache + * flushing to make forward progress, + * - which has to make forward progress when the + * journal is pre-reservation full, + * - and depends on allocation - meaning allocator and + * copygc + */ + + data_opts->rewrite_ptrs = 0; + data_opts->target = io_opts->background_target; + data_opts->extra_replicas = 0; + data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE| + JOURNAL_WATERMARK_copygc; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); @@ -55,12 +72,12 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, .dev = p.ptr.dev, .offset = p.ptr.offset, }; - ssize_t i; + ssize_t eytz; if (p.ptr.cached) continue; - i = eytzinger0_find_le(h->data, h->used, + eytz = eytzinger0_find_le(h->data, h->used, sizeof(h->data[0]), bucket_offset_cmp, &search); #if 0 @@ -74,34 +91,16 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, BUG_ON(i != j); #endif - if (i >= 0 && - p.ptr.dev == h->data[i].dev && - p.ptr.offset < h->data[i].offset + ca->mi.bucket_size && - p.ptr.gen == h->data[i].gen) { - /* - * We need to use the journal reserve here, because - * - journal reclaim depends on btree key cache - * flushing to make forward progress, - * - which has to make forward progress when the - * journal is pre-reservation full, - * - and depends on allocation - meaning allocator and - * copygc - */ - - data_opts->target = io_opts->background_target; - data_opts->nr_replicas = 1; - data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE| - JOURNAL_WATERMARK_copygc; - data_opts->rewrite_dev = p.ptr.dev; - - if (p.has_ec) - data_opts->nr_replicas += p.ec.redundancy; - - return DATA_REWRITE; - } + if (eytz >= 0 && + p.ptr.dev == h->data[eytz].dev && + p.ptr.offset < h->data[eytz].offset + ca->mi.bucket_size && + p.ptr.gen == h->data[eytz].gen) + data_opts->rewrite_ptrs |= 1U << i; + + i++; } - return DATA_SKIP; + return data_opts->rewrite_ptrs != 0; } static inline int fragmentation_cmp(copygc_heap *heap, diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 1724ae36c0f4..63b24dc9c917 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -22,62 +22,70 @@ * returns -1 if it should not be moved, or * device of pointer that should be moved, if known, or INT_MAX if unknown */ -static int __bch2_rebalance_pred(struct bch_fs *c, - struct bkey_s_c k, - struct bch_io_opts *io_opts) +static bool rebalance_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + unsigned i; + + data_opts->rewrite_ptrs = 0; + data_opts->target = io_opts->background_target; + data_opts->extra_replicas = 0; + data_opts->btree_insert_flags = 0; if (io_opts->background_compression && - !bch2_bkey_is_incompressible(k)) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + !bch2_bkey_is_incompressible(k)) { + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + + i = 0; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { if (!p.ptr.cached && p.crc.compression_type != bch2_compression_opt_to_type[io_opts->background_compression]) - return p.ptr.dev; + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + } - if (io_opts->background_target) - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (!p.ptr.cached && - !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target)) - return p.ptr.dev; + if (io_opts->background_target) { + const struct bch_extent_ptr *ptr; - return -1; + i = 0; + bkey_for_each_ptr(ptrs, ptr) { + if (!ptr->cached && + !bch2_dev_in_target(c, ptr->dev, io_opts->background_target)) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + } + + return data_opts->rewrite_ptrs != 0; } void bch2_rebalance_add_key(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts) { - atomic64_t *counter; - int dev; + struct data_update_opts update_opts = { 0 }; + struct bkey_ptrs_c ptrs; + const struct bch_extent_ptr *ptr; + unsigned i; - dev = __bch2_rebalance_pred(c, k, io_opts); - if (dev < 0) + if (!rebalance_pred(c, NULL, k, io_opts, &update_opts)) return; - counter = dev < INT_MAX - ? &bch_dev_bkey_exists(c, dev)->rebalance_work - : &c->rebalance.work_unknown_dev; - - if (atomic64_add_return(k.k->size, counter) == k.k->size) - rebalance_wakeup(c); -} - -static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - if (__bch2_rebalance_pred(c, k, io_opts) >= 0) { - data_opts->target = io_opts->background_target; - data_opts->nr_replicas = 1; - data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; - } else { - return DATA_SKIP; + i = 0; + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr(ptrs, ptr) { + if ((1U << i) && update_opts.rewrite_ptrs) + if (atomic64_add_return(k.k->size, + &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) == + k.k->size) + rebalance_wakeup(c); + i++; } } |