summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-11-19 05:35:59 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:08:11 +0300
commitf1a79365a7416c5046f88d0db025e1d84c32b252 (patch)
treeb7e0d9522c89d74404b97cacad82a172931bf06d /fs/bcachefs
parent129550c4d08fcc518c7cbe747657ed18470f712a (diff)
downloadlinux-f1a79365a7416c5046f88d0db025e1d84c32b252.tar.xz
bcachefs: Don't block on journal reservation with btree locks held
Fixes a deadlock between the allocator thread, when it first starts up, and journal replay Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/btree_update_interior.c6
-rw-r--r--fs/bcachefs/btree_update_leaf.c38
-rw-r--r--fs/bcachefs/journal.c84
-rw-r--r--fs/bcachefs/journal.h39
4 files changed, 108 insertions, 59 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 01e476d72595..af31819c88c7 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -638,12 +638,12 @@ static void btree_update_wait_on_journal(struct closure *cl)
int ret;
ret = bch2_journal_open_seq_async(&c->journal, as->journal_seq, cl);
- if (ret < 0)
- goto err;
- if (!ret) {
+ if (ret == -EAGAIN) {
continue_at(cl, btree_update_wait_on_journal, system_wq);
return;
}
+ if (ret < 0)
+ goto err;
bch2_journal_flush_seq_async(&c->journal, as->journal_seq, cl);
err:
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 093e480977c7..41691bebf679 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -344,19 +344,35 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
trans_for_each_entry(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
- u64s = 0;
- trans_for_each_entry(trans, i)
- u64s += jset_u64s(i->k->k.u64s);
-
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
- ret = !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)
- ? bch2_journal_res_get(&c->journal,
- &trans->journal_res,
- u64s, u64s)
- : 0;
- if (ret)
- return ret;
+ if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+ u64s = 0;
+ trans_for_each_entry(trans, i)
+ u64s += jset_u64s(i->k->k.u64s);
+
+ while ((ret = bch2_journal_res_get(&c->journal,
+ &trans->journal_res, u64s,
+ JOURNAL_RES_GET_NONBLOCK)) == -EAGAIN) {
+ struct btree_iter *iter = trans->entries[0].iter;
+
+ bch2_btree_iter_unlock(iter);
+
+ ret = bch2_journal_res_get(&c->journal,
+ &trans->journal_res, u64s,
+ JOURNAL_RES_GET_CHECK);
+ if (ret)
+ return ret;
+
+ if (!bch2_btree_iter_relock(iter)) {
+ trans_restart(" (iter relock after journal res get blocked)");
+ return -EINTR;
+ }
+ }
+
+ if (ret)
+ return ret;
+ }
multi_lock_write(c, trans);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index b4d037664628..5db0a469ac24 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -335,15 +335,14 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
}
static int __journal_res_get(struct journal *j, struct journal_res *res,
- unsigned u64s_min, unsigned u64s_max)
+ unsigned flags)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *buf;
int ret;
retry:
- ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
- if (ret)
- return ret;
+ if (journal_res_get_fast(j, res, flags))
+ return 0;
spin_lock(&j->lock);
/*
@@ -351,10 +350,9 @@ retry:
* that just did journal_entry_open() and call journal_entry_close()
* unnecessarily
*/
- ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
- if (ret) {
+ if (journal_res_get_fast(j, res, flags)) {
spin_unlock(&j->lock);
- return 1;
+ return 0;
}
/*
@@ -377,7 +375,12 @@ retry:
spin_unlock(&j->lock);
return -EROFS;
case JOURNAL_ENTRY_INUSE:
- /* haven't finished writing out the previous one: */
+ /*
+ * The current journal entry is still open, but we failed to get
+ * a journal reservation because there's not enough space in it,
+ * and we can't close it and start another because we haven't
+ * finished writing out the previous entry:
+ */
spin_unlock(&j->lock);
trace_journal_entry_full(c);
goto blocked;
@@ -408,7 +411,7 @@ retry:
blocked:
if (!j->res_get_blocked_start)
j->res_get_blocked_start = local_clock() ?: 1;
- return 0;
+ return -EAGAIN;
}
/*
@@ -422,14 +425,14 @@ blocked:
* btree node write locks.
*/
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
- unsigned u64s_min, unsigned u64s_max)
+ unsigned flags)
{
int ret;
wait_event(j->wait,
- (ret = __journal_res_get(j, res, u64s_min,
- u64s_max)));
- return ret < 0 ? ret : 0;
+ (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
+ (flags & JOURNAL_RES_GET_NONBLOCK));
+ return ret;
}
u64 bch2_journal_last_unwritten_seq(struct journal *j)
@@ -453,28 +456,55 @@ u64 bch2_journal_last_unwritten_seq(struct journal *j)
* btree root - every journal entry contains the roots of all the btrees, so it
* doesn't need to bother with getting a journal reservation
*/
-int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *parent)
+int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
{
- int ret;
-
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ bool need_reclaim = false;
+retry:
spin_lock(&j->lock);
- BUG_ON(seq > journal_cur_seq(j));
if (seq < journal_cur_seq(j) ||
journal_entry_is_open(j)) {
spin_unlock(&j->lock);
- return 1;
+ return 0;
+ }
+
+ if (journal_cur_seq(j) < seq) {
+ switch (journal_buf_switch(j, false)) {
+ case JOURNAL_ENTRY_ERROR:
+ spin_unlock(&j->lock);
+ return -EROFS;
+ case JOURNAL_ENTRY_INUSE:
+ /* haven't finished writing out the previous one: */
+ trace_journal_entry_full(c);
+ goto blocked;
+ case JOURNAL_ENTRY_CLOSED:
+ break;
+ case JOURNAL_UNLOCKED:
+ goto retry;
+ }
+ }
+
+ BUG_ON(journal_cur_seq(j) < seq);
+
+ if (!journal_entry_open(j)) {
+ need_reclaim = true;
+ goto blocked;
}
- ret = journal_entry_open(j);
- if (!ret)
- closure_wait(&j->async_wait, parent);
spin_unlock(&j->lock);
- if (!ret)
- bch2_journal_reclaim_work(&j->reclaim_work.work);
+ return 0;
+blocked:
+ if (!j->res_get_blocked_start)
+ j->res_get_blocked_start = local_clock() ?: 1;
- return ret;
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+
+ if (need_reclaim)
+ bch2_journal_reclaim_work(&j->reclaim_work.work);
+ return -EAGAIN;
}
static int journal_seq_error(struct journal *j, u64 seq)
@@ -594,11 +624,10 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
void bch2_journal_meta_async(struct journal *j, struct closure *parent)
{
struct journal_res res;
- unsigned u64s = jset_u64s(0);
memset(&res, 0, sizeof(res));
- bch2_journal_res_get(j, &res, u64s, u64s);
+ bch2_journal_res_get(j, &res, jset_u64s(0), 0);
bch2_journal_res_put(j, &res);
bch2_journal_flush_seq_async(j, res.seq, parent);
@@ -607,12 +636,11 @@ void bch2_journal_meta_async(struct journal *j, struct closure *parent)
int bch2_journal_meta(struct journal *j)
{
struct journal_res res;
- unsigned u64s = jset_u64s(0);
int ret;
memset(&res, 0, sizeof(res));
- ret = bch2_journal_res_get(j, &res, u64s, u64s);
+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
if (ret)
return ret;
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 77cf39cc64ff..d9c094ba2ca0 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -272,12 +272,14 @@ static inline void bch2_journal_res_put(struct journal *j,
}
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
- unsigned, unsigned);
+ unsigned);
+
+#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
+#define JOURNAL_RES_GET_CHECK (1 << 1)
static inline int journal_res_get_fast(struct journal *j,
struct journal_res *res,
- unsigned u64s_min,
- unsigned u64s_max)
+ unsigned flags)
{
union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter);
@@ -289,42 +291,45 @@ static inline int journal_res_get_fast(struct journal *j,
* Check if there is still room in the current journal
* entry:
*/
- if (old.cur_entry_offset + u64s_min > j->cur_entry_u64s)
+ if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
return 0;
- res->offset = old.cur_entry_offset;
- res->u64s = min(u64s_max, j->cur_entry_u64s -
- old.cur_entry_offset);
+ if (flags & JOURNAL_RES_GET_CHECK)
+ return 1;
- journal_state_inc(&new);
new.cur_entry_offset += res->u64s;
+ journal_state_inc(&new);
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
- res->ref = true;
- res->idx = new.idx;
- res->seq = le64_to_cpu(j->buf[res->idx].data->seq);
+ res->ref = true;
+ res->idx = old.idx;
+ res->offset = old.cur_entry_offset;
+ res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
return 1;
}
static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
- unsigned u64s_min, unsigned u64s_max)
+ unsigned u64s, unsigned flags)
{
int ret;
EBUG_ON(res->ref);
- EBUG_ON(u64s_max < u64s_min);
EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
- if (journal_res_get_fast(j, res, u64s_min, u64s_max))
+ res->u64s = u64s;
+
+ if (journal_res_get_fast(j, res, flags))
goto out;
- ret = bch2_journal_res_get_slowpath(j, res, u64s_min, u64s_max);
+ ret = bch2_journal_res_get_slowpath(j, res, flags);
if (ret)
return ret;
out:
- lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
- EBUG_ON(!res->ref);
+ if (!(flags & JOURNAL_RES_GET_CHECK)) {
+ lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
+ EBUG_ON(!res->ref);
+ }
return 0;
}