summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Hill <daniel@gluo.nz>2022-06-15 17:06:43 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:09:35 +0300
commitc91996c50a9ad6569cf9cb52e79c171f0d34814d (patch)
tree5676408c15b5ed709c1a551e7e0ac533c21981cf
parent7f5c5d20f01483ba53233e3e2c54848e0b2d9ecd (diff)
downloadlinux-c91996c50a9ad6569cf9cb52e79c171f0d34814d.tar.xz
bcachefs: data jobs, including rebalance wait for copygc.
move_ratelimit() now has a bool that specifies whether we want to wait for copygc to finish. When copygc is running, we're probably low on free buckets instead of consuming the remaining buckets, we want to wait for copygc to finish. This should help with performance, and run away bucket fragmentation. Signed-off-by: Daniel Hill <daniel@gluo.nz> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/move.c110
-rw-r--r--fs/bcachefs/move.h3
-rw-r--r--fs/bcachefs/movinggc.c15
-rw-r--r--fs/bcachefs/rebalance.c2
5 files changed, 80 insertions, 52 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 127323b677df..c07ea9af561d 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -825,6 +825,8 @@ mempool_t bio_bounce_pages;
copygc_heap copygc_heap;
struct write_point copygc_write_point;
s64 copygc_wait;
+ bool copygc_running;
+ wait_queue_head_t copygc_running_wq;
/* DATA PROGRESS STATS */
struct list_head data_progress_list;
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 4060678cf716..fad15ba7d239 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -237,24 +237,72 @@ err:
return ret;
}
+static int move_ratelimit(struct btree_trans *trans,
+ struct moving_context *ctxt,
+ struct bch_ratelimit *rate,
+ bool wait_on_copygc)
+{
+ struct bch_fs *c = trans->c;
+ u64 delay;
+
+ if (wait_on_copygc) {
+ bch2_trans_unlock(trans);
+ wait_event_killable(c->copygc_running_wq,
+ !c->copygc_running ||
+ kthread_should_stop());
+ }
+
+ do {
+ delay = rate ? bch2_ratelimit_delay(rate) : 0;
+
+ if (delay) {
+ bch2_trans_unlock(trans);
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
+ return 1;
+ }
+
+ if (delay)
+ schedule_timeout(delay);
+
+ if (unlikely(freezing(current))) {
+ move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
+ try_to_freeze();
+ }
+ } while (delay);
+
+ move_ctxt_wait_event(ctxt, trans,
+ atomic_read(&ctxt->write_sectors) <
+ c->opts.move_bytes_in_flight >> 9);
+
+ move_ctxt_wait_event(ctxt, trans,
+ atomic_read(&ctxt->read_sectors) <
+ c->opts.move_bytes_in_flight >> 9);
+
+ return 0;
+}
+
static int __bch2_move_data(struct bch_fs *c,
- struct moving_context *ctxt,
- struct bch_ratelimit *rate,
- struct write_point_specifier wp,
- struct bpos start,
- struct bpos end,
- move_pred_fn pred, void *arg,
- struct bch_move_stats *stats,
- enum btree_id btree_id)
+ struct moving_context *ctxt,
+ struct bch_ratelimit *rate,
+ struct write_point_specifier wp,
+ struct bpos start,
+ struct bpos end,
+ move_pred_fn pred, void *arg,
+ struct bch_move_stats *stats,
+ enum btree_id btree_id,
+ bool wait_on_copygc)
{
- bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct bkey_buf sk;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct data_update_opts data_opts;
- u64 delay, cur_inum = U64_MAX;
+ u64 cur_inum = U64_MAX;
int ret = 0, ret2;
bch2_bkey_buf_init(&sk);
@@ -271,37 +319,7 @@ static int __bch2_move_data(struct bch_fs *c,
if (rate)
bch2_ratelimit_reset(rate);
- while (1) {
- do {
- delay = rate ? bch2_ratelimit_delay(rate) : 0;
-
- if (delay) {
- bch2_trans_unlock(&trans);
- set_current_state(TASK_INTERRUPTIBLE);
- }
-
- if (kthread && (ret = kthread_should_stop())) {
- __set_current_state(TASK_RUNNING);
- goto out;
- }
-
- if (delay)
- schedule_timeout(delay);
-
- if (unlikely(freezing(current))) {
- move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
- try_to_freeze();
- }
- } while (delay);
-
- move_ctxt_wait_event(ctxt, &trans,
- atomic_read(&ctxt->write_sectors) <
- c->opts.move_bytes_in_flight >> 9);
-
- move_ctxt_wait_event(ctxt, &trans,
- atomic_read(&ctxt->read_sectors) <
- c->opts.move_bytes_in_flight >> 9);
-
+ while (!move_ratelimit(&trans, ctxt, rate, wait_on_copygc)) {
bch2_trans_begin(&trans);
k = bch2_btree_iter_peek(&iter);
@@ -374,7 +392,6 @@ next:
next_nondata:
bch2_btree_iter_advance(&iter);
}
-out:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
@@ -413,7 +430,8 @@ int bch2_move_data(struct bch_fs *c,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
move_pred_fn pred, void *arg,
- struct bch_move_stats *stats)
+ struct bch_move_stats *stats,
+ bool wait_on_copygc)
{
struct moving_context ctxt = { .stats = stats };
enum btree_id id;
@@ -438,7 +456,7 @@ int bch2_move_data(struct bch_fs *c,
ret = __bch2_move_data(c, &ctxt, rate, wp,
id == start_btree_id ? start_pos : POS_MIN,
id == end_btree_id ? end_pos : POS_MAX,
- pred, arg, stats, id);
+ pred, arg, stats, id, wait_on_copygc);
if (ret)
break;
}
@@ -675,7 +693,7 @@ int bch2_data_job(struct bch_fs *c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
NULL, writepoint_hashed((unsigned long) current),
- rereplicate_pred, c, stats) ?: ret;
+ rereplicate_pred, c, stats, true) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_MIGRATE:
@@ -696,7 +714,7 @@ int bch2_data_job(struct bch_fs *c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
NULL, writepoint_hashed((unsigned long) current),
- migrate_pred, &op, stats) ?: ret;
+ migrate_pred, &op, stats, true) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_REWRITE_OLD_NODES:
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index fd5562909382..d362cb545c0b 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -35,7 +35,8 @@ int bch2_move_data(struct bch_fs *,
struct bch_ratelimit *,
struct write_point_specifier,
move_pred_fn, void *,
- struct bch_move_stats *);
+ struct bch_move_stats *,
+ bool);
int bch2_data_job(struct bch_fs *,
struct bch_move_stats *,
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index d63b9fea4f05..8b6ad9ec72af 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -316,7 +316,8 @@ static int bch2_copygc(struct bch_fs *c)
NULL,
writepoint_ptr(&c->copygc_write_point),
copygc_pred, NULL,
- &move_stats);
+ &move_stats,
+ false);
if (ret < 0)
bch_err(c, "error %i from bch2_move_data() in copygc", ret);
if (ret)
@@ -381,10 +382,11 @@ static int bch2_copygc_thread(void *arg)
struct bch_fs *c = arg;
struct io_clock *clock = &c->io_clock[WRITE];
u64 last, wait;
+ int ret = 0;
set_freezable();
- while (!kthread_should_stop()) {
+ while (!ret && !kthread_should_stop()) {
cond_resched();
if (kthread_wait_freezable(c->copy_gc_enabled))
@@ -403,8 +405,11 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
- if (bch2_copygc(c))
- break;
+ c->copygc_running = true;
+ ret = bch2_copygc(c);
+ c->copygc_running = false;
+
+ wake_up(&c->copygc_running_wq);
}
return 0;
@@ -448,4 +453,6 @@ int bch2_copygc_start(struct bch_fs *c)
void bch2_fs_copygc_init(struct bch_fs *c)
{
+ init_waitqueue_head(&c->copygc_running_wq);
+ c->copygc_running = false;
}
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 63b24dc9c917..57082260fc00 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -255,7 +255,7 @@ static int bch2_rebalance_thread(void *arg)
NULL, /* &r->pd.rate, */
writepoint_ptr(&c->rebalance_write_point),
rebalance_pred, NULL,
- &move_stats);
+ &move_stats, true);
}
return 0;