summaryrefslogtreecommitdiff
path: root/fs/bcachefs/journal.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-01-31 22:26:15 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2024-03-10 22:34:08 +0300
commit916abefd437be89383e720f19a43f727f045a468 (patch)
tree1855fe5f28afd255f1c99f7ed80991e16d05281b /fs/bcachefs/journal.c
parent38789c25087471121cfa42333bfb249eae539682 (diff)
downloadlinux-916abefd437be89383e720f19a43f727f045a468.tar.xz
bcachefs: better journal pipelining
Recently a severe performance regression was discovered, which bisected to a6548c8b5eb5 bcachefs: Avoid flushing the journal in the discard path It turns out the old behaviour, which issued excessive journal flushes, worked around a performance issue where queueing delays would cause the journal to not be able to write quickly enough and stall. The journal flushes masked the issue because they periodically flushed the device write cache, reducing write latency for non flushes. This patch reworks the journalling code to allow more than one (non-flush) write to be in flight at a time. With this patch, doing 4k random writes and an iodepth of 128, we are now able to hit 560k iops to a Samsung 970 EVO Plus - previously, we were stuck in the ~200k range. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/journal.c')
-rw-r--r--fs/bcachefs/journal.c47
1 files changed, 38 insertions, 9 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 963933930366..fe5f7a944ad3 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -74,6 +74,13 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
prt_printf(out, "%li jiffies", buf->expires - jiffies);
prt_newline(out);
+ if (buf->write_done)
+ prt_printf(out, "write done\n");
+ else if (buf->write_allocated)
+ prt_printf(out, "write allocated\n");
+ else if (buf->write_started)
+ prt_printf(out, "write started\n");
+
printbuf_indent_sub(out, 2);
}
@@ -175,21 +182,40 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
return stuck;
}
+void bch2_journal_do_writes(struct journal *j)
+{
+ for (u64 seq = journal_last_unwritten_seq(j);
+ seq <= journal_cur_seq(j);
+ seq++) {
+ unsigned idx = seq & JOURNAL_BUF_MASK;
+ struct journal_buf *w = j->buf + idx;
+
+ if (w->write_started && !w->write_allocated)
+ break;
+ if (w->write_started)
+ continue;
+
+ if (!journal_state_count(j->reservations, idx)) {
+ w->write_started = true;
+ closure_call(&w->io, bch2_journal_write, j->wq, NULL);
+ }
+
+ break;
+ }
+}
+
/*
* Final processing when the last reference of a journal buffer has been
* dropped. Drop the pin list reference acquired at journal entry open and write
* the buffer, if requested.
*/
-void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
+void bch2_journal_buf_put_final(struct journal *j, u64 seq)
{
lockdep_assert_held(&j->lock);
if (__bch2_journal_pin_put(j, seq))
bch2_journal_reclaim_fast(j);
- if (write) {
- struct journal_buf *w = j->buf + (seq & JOURNAL_BUF_MASK);
- closure_call(&w->io, bch2_journal_write, j->wq, NULL);
- }
+ bch2_journal_do_writes(j);
}
/*
@@ -381,11 +407,14 @@ static int journal_entry_open(struct journal *j)
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
bkey_extent_init(&buf->key);
- buf->noflush = false;
- buf->must_flush = false;
- buf->separate_flush = false;
- buf->flush_time = 0;
+ buf->noflush = false;
+ buf->must_flush = false;
+ buf->separate_flush = false;
+ buf->flush_time = 0;
buf->need_flush_to_write_buffer = true;
+ buf->write_started = false;
+ buf->write_allocated = false;
+ buf->write_done = false;
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));