From b40901b0f7182557851c8e9af31bacfbbd76b1ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 Mar 2023 22:01:47 -0400 Subject: bcachefs: New erasure coding shutdown path This implements a new shutdown path for erasure coding, which is needed for the upcoming BCH_WRITE_WAIT_FOR_EC write path. The process is: - Cancel new stripes being built up - Close out/cancel open buckets on write points or the partial list that are for stripes - Shutdown rebalance/copygc - Then wait for in flight new stripes to finish With BCH_WRITE_WAIT_FOR_EC, move ops will be waiting on stripes to fill up before they complete; the new ec shutdown path is needed for shutting down copygc/rebalance without deadlocking. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 39 +---------------- fs/bcachefs/alloc_foreground.c | 96 ++++++++++++++++++++++++++++++++---------- fs/bcachefs/alloc_foreground.h | 6 +-- fs/bcachefs/bcachefs.h | 7 ++- fs/bcachefs/data_update.c | 1 + fs/bcachefs/ec.c | 54 +++++++++++++++++++++--- fs/bcachefs/ec.h | 4 +- fs/bcachefs/io.c | 10 ++++- fs/bcachefs/move.c | 6 --- fs/bcachefs/super.c | 12 ++---- 10 files changed, 141 insertions(+), 94 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index e5abe6406afe..17bcebbd1f2a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2158,44 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) */ bch2_recalc_capacity(c); - /* Next, close write points that point to this device... */ - for (i = 0; i < ARRAY_SIZE(c->write_points); i++) - bch2_writepoint_stop(c, ca, &c->write_points[i]); - - bch2_writepoint_stop(c, ca, &c->copygc_write_point); - bch2_writepoint_stop(c, ca, &c->rebalance_write_point); - bch2_writepoint_stop(c, ca, &c->btree_write_point); - - mutex_lock(&c->btree_reserve_cache_lock); - while (c->btree_reserve_cache_nr) { - struct btree_alloc *a = - &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - - bch2_open_buckets_put(c, &a->ob); - } - mutex_unlock(&c->btree_reserve_cache_lock); - - spin_lock(&c->freelist_lock); - i = 0; - while (i < c->open_buckets_partial_nr) { - struct open_bucket *ob = - c->open_buckets + c->open_buckets_partial[i]; - - if (ob->dev == ca->dev_idx) { - --c->open_buckets_partial_nr; - swap(c->open_buckets_partial[i], - c->open_buckets_partial[c->open_buckets_partial_nr]); - ob->on_partial_list = false; - spin_unlock(&c->freelist_lock); - bch2_open_bucket_put(c, ob); - spin_lock(&c->freelist_lock); - } else { - i++; - } - } - spin_unlock(&c->freelist_lock); - - bch2_ec_stop_dev(c, ca); + bch2_open_buckets_stop(c, ca, false); /* * Wake up threads that were blocked on allocation, so they can notice diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 7c81189bcd62..20c64882104e 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1023,45 +1023,96 @@ static int open_bucket_add_buckets(struct btree_trans *trans, return ret < 0 ? ret : 0; } -void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca, - struct open_buckets *obs) +static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, + struct bch_dev *ca, bool ec) { - struct open_buckets ptrs = { .nr = 0 }; - struct open_bucket *ob, *ob2; - unsigned i, j; - - open_bucket_for_each(c, obs, ob, i) { - bool drop = !ca || ob->dev == ca->dev_idx; + if (ec) { + return ob->ec != NULL; + } else if (ca) { + bool drop = ob->dev == ca->dev_idx; + struct open_bucket *ob2; + unsigned i; if (!drop && ob->ec) { mutex_lock(&ob->ec->lock); - for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) { - if (!ob->ec->blocks[j]) + for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) { + if (!ob->ec->blocks[i]) continue; - ob2 = c->open_buckets + ob->ec->blocks[j]; + ob2 = c->open_buckets + ob->ec->blocks[i]; drop |= ob2->dev == ca->dev_idx; } mutex_unlock(&ob->ec->lock); } - if (drop) - bch2_open_bucket_put(c, ob); - else - ob_push(c, &ptrs, ob); + return drop; + } else { + return true; } - - *obs = ptrs; } -void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, - struct write_point *wp) +static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, + bool ec, struct write_point *wp) { + struct open_buckets ptrs = { .nr = 0 }; + struct open_bucket *ob; + unsigned i; + mutex_lock(&wp->lock); - bch2_open_buckets_stop_dev(c, ca, &wp->ptrs); + open_bucket_for_each(c, &wp->ptrs, ob, i) + if (should_drop_bucket(ob, c, ca, ec)) + bch2_open_bucket_put(c, ob); + else + ob_push(c, &ptrs, ob); + wp->ptrs = ptrs; mutex_unlock(&wp->lock); } +void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, + bool ec) +{ + unsigned i; + + /* Next, close write points that point to this device... */ + for (i = 0; i < ARRAY_SIZE(c->write_points); i++) + bch2_writepoint_stop(c, ca, ec, &c->write_points[i]); + + bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point); + bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); + bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); + + mutex_lock(&c->btree_reserve_cache_lock); + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = + &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; + + bch2_open_buckets_put(c, &a->ob); + } + mutex_unlock(&c->btree_reserve_cache_lock); + + spin_lock(&c->freelist_lock); + i = 0; + while (i < c->open_buckets_partial_nr) { + struct open_bucket *ob = + c->open_buckets + c->open_buckets_partial[i]; + + if (should_drop_bucket(ob, c, ca, ec)) { + --c->open_buckets_partial_nr; + swap(c->open_buckets_partial[i], + c->open_buckets_partial[c->open_buckets_partial_nr]); + ob->on_partial_list = false; + spin_unlock(&c->freelist_lock); + bch2_open_bucket_put(c, ob); + spin_lock(&c->freelist_lock); + } else { + i++; + } + } + spin_unlock(&c->freelist_lock); + + bch2_ec_stop_dev(c, ca); +} + static inline struct hlist_head *writepoint_hash(struct bch_fs *c, unsigned long write_point) { @@ -1107,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c) return true; } -static bool try_decrease_writepoints(struct bch_fs *c, - unsigned old_nr) +static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr) { struct write_point *wp; @@ -1129,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c, hlist_del_rcu(&wp->node); mutex_unlock(&c->write_points_hash_lock); - bch2_writepoint_stop(c, NULL, wp); + bch2_writepoint_stop(c, NULL, false, wp); return true; } diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 1fa96f8c6879..8a1cf425091b 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, struct bkey_i *, unsigned, bool); void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); -void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *, - struct open_buckets *); - -void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *, - struct write_point *); +void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool); static inline struct write_point_specifier writepoint_hashed(unsigned long v) { diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 8be65ebb34ad..05fc0f7434dd 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -655,7 +655,6 @@ typedef struct { x(fallocate) \ x(discard) \ x(invalidate) \ - x(move) \ x(delete_dead_snapshots) \ x(snapshot_delete_pagecache) \ x(sysfs) @@ -958,14 +957,14 @@ struct bch_fs { struct list_head ec_stripe_new_list; struct mutex ec_stripe_new_lock; + wait_queue_head_t ec_stripe_new_wait; struct work_struct ec_stripe_create_work; u64 ec_stripe_hint; - struct bio_set ec_bioset; - struct work_struct ec_stripe_delete_work; - struct llist_head ec_stripe_delete_list; + + struct bio_set ec_bioset; /* REFLINK */ u64 reflink_hint; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 447863825a89..5ec884a222f8 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -252,6 +252,7 @@ restart_drop_extra_replicas: BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, NULL, + BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| m->data_opts.btree_insert_flags); if (!ret) { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index af3a72acc67f..1e621dcc1d37 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -989,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b while (1) { ret = commit_do(trans, NULL, NULL, + BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL, ec_stripe_update_extent(trans, bucket_pos, bucket.gen, s, &bp_offset)); @@ -1127,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err; } - ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL, + ret = bch2_trans_do(c, &s->res, NULL, + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_NOFAIL, ec_stripe_key_update(&trans, &s->new_stripe.key, !s->have_existing_stripe)); if (ret) { @@ -1409,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans, if (ret) return ERR_PTR(ret); + if (test_bit(BCH_FS_GOING_RO, &c->flags)) { + h = ERR_PTR(-EROFS); + goto found; + } + list_for_each_entry(h, &c->ec_stripe_head_list, list) if (h->target == target && h->algo == algo && @@ -1753,7 +1761,7 @@ err: return ERR_PTR(ret); } -void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) +static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) { struct ec_stripe_head *h; struct open_bucket *ob; @@ -1761,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) mutex_lock(&c->ec_stripe_head_lock); list_for_each_entry(h, &c->ec_stripe_head_list, list) { - mutex_lock(&h->lock); if (!h->s) goto unlock; + if (!ca) + goto found; + for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) { if (!h->s->blocks[i]) continue; @@ -1784,6 +1794,32 @@ unlock: mutex_unlock(&c->ec_stripe_head_lock); } +void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) +{ + __bch2_ec_stop(c, ca); +} + +void bch2_fs_ec_stop(struct bch_fs *c) +{ + __bch2_ec_stop(c, NULL); +} + +static bool bch2_fs_ec_flush_done(struct bch_fs *c) +{ + bool ret; + + mutex_lock(&c->ec_stripe_new_lock); + ret = list_empty(&c->ec_stripe_new_list); + mutex_unlock(&c->ec_stripe_new_lock); + + return ret; +} + +void bch2_fs_ec_flush(struct bch_fs *c) +{ + wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c)); +} + int bch2_stripes_read(struct bch_fs *c) { struct btree_trans trans; @@ -1915,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c) void bch2_fs_ec_init_early(struct bch_fs *c) { + spin_lock_init(&c->ec_stripes_new_lock); + mutex_init(&c->ec_stripes_heap_lock); + + INIT_LIST_HEAD(&c->ec_stripe_head_list); + mutex_init(&c->ec_stripe_head_lock); + + INIT_LIST_HEAD(&c->ec_stripe_new_list); + mutex_init(&c->ec_stripe_new_lock); + init_waitqueue_head(&c->ec_stripe_new_wait); + INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work); INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work); } int bch2_fs_ec_init(struct bch_fs *c) { - spin_lock_init(&c->ec_stripes_new_lock); - return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), BIOSET_NEED_BVECS); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 8f777a37e43d..7c08a49d7419 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -245,8 +245,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, } void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); - -void bch2_ec_flush_new_stripes(struct bch_fs *); +void bch2_fs_ec_stop(struct bch_fs *); +void bch2_fs_ec_flush(struct bch_fs *); int bch2_stripes_read(struct bch_fs *); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 1b093650ff9a..e82da496b3f8 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -705,7 +705,8 @@ static void bch2_write_done(struct closure *cl) struct bch_fs *c = op->c; bch2_disk_reservation_put(c, &op->res); - bch2_write_ref_put(c, BCH_WRITE_REF_write); + if (!(op->flags & BCH_WRITE_MOVE)) + bch2_write_ref_put(c, BCH_WRITE_REF_write); bch2_keylist_free(&op->insert_keys, op->inline_keys); bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); @@ -1842,7 +1843,12 @@ void bch2_write(struct closure *cl) goto err; } - if (c->opts.nochanges || + if (c->opts.nochanges) { + op->error = -BCH_ERR_erofs_no_writes; + goto err; + } + + if (!(op->flags & BCH_WRITE_MOVE) && !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { op->error = -BCH_ERR_erofs_no_writes; goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index f74ef947cac5..4a9ffca7be62 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -59,7 +59,6 @@ struct moving_io { static void move_free(struct moving_io *io) { struct moving_context *ctxt = io->write.ctxt; - struct bch_fs *c = ctxt->c; if (io->b) atomic_dec(&io->b->count); @@ -71,7 +70,6 @@ static void move_free(struct moving_io *io) wake_up(&ctxt->wait); mutex_unlock(&ctxt->lock); - bch2_write_ref_put(c, BCH_WRITE_REF_move); kfree(io); } @@ -280,9 +278,6 @@ static int bch2_move_extent(struct btree_trans *trans, return 0; } - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move)) - return -BCH_ERR_erofs_no_writes; - /* * Before memory allocations & taking nocow locks in * bch2_data_update_init(): @@ -378,7 +373,6 @@ err_free_pages: err_free: kfree(io); err: - bch2_write_ref_put(c, BCH_WRITE_REF_move); trace_and_count(c, move_extent_alloc_mem_fail, k.k); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index bf3aabdb0fc9..278f8f19a230 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -205,9 +205,12 @@ static void __bch2_fs_read_only(struct bch_fs *c) unsigned i, clean_passes = 0; u64 seq = 0; + bch2_fs_ec_stop(c); + bch2_open_buckets_stop(c, NULL, true); bch2_rebalance_stop(c); bch2_copygc_stop(c); bch2_gc_thread_stop(c); + bch2_fs_ec_flush(c); bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu", journal_cur_seq(&c->journal)); @@ -700,15 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->fsck_errors); mutex_init(&c->fsck_error_lock); - INIT_LIST_HEAD(&c->ec_stripe_head_list); - mutex_init(&c->ec_stripe_head_lock); - - INIT_LIST_HEAD(&c->ec_stripe_new_list); - mutex_init(&c->ec_stripe_new_lock); - - - mutex_init(&c->ec_stripes_heap_lock); - seqcount_init(&c->gc_pos_lock); seqcount_init(&c->usage_lock); -- cgit v1.2.3