summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-01-15 00:21:22 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:08:51 +0300
commited9d58a2b1ddbc38816571638ee114b7efb9f279 (patch)
tree45ac754bb0a0abad856d100e891f3c9f8bf18f78
parentac958006294ab462848bc69b9b5ddb1a8b99e748 (diff)
downloadlinux-ed9d58a2b1ddbc38816571638ee114b7efb9f279.tar.xz
bcachefs: Run jset_validate in write path as well
This is because we had a bug where we were writing out journal entries with garbage last_seq, and not catching it. Also, completely ignore jset->last_seq when JSET_NO_FLUSH is true, because of aforementioned bug, but change the write path to set last_seq to 0 when JSET_NO_FLUSH is true. Minor other cleanups and comments. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_io.c2
-rw-r--r--fs/bcachefs/journal.c5
-rw-r--r--fs/bcachefs/journal_io.c54
3 files changed, 43 insertions, 18 deletions
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 831f387557aa..c4d53ea2e920 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1624,7 +1624,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
validate_before_checksum = true;
/* validate_bset will be modifying: */
- if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
+ if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change)
validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 3ca8137923a6..e90fe042302f 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -117,6 +117,9 @@ void __bch2_journal_buf_put(struct journal *j)
/*
* Returns true if journal entry is now closed:
+ *
+ * We don't close a journal_buf until the next journal_buf is finished writing,
+ * and can be opened again - this also initializes the next journal_buf:
*/
static bool __journal_entry_close(struct journal *j)
{
@@ -154,6 +157,7 @@ static bool __journal_entry_close(struct journal *j)
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
+ /* Close out old buffer: */
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
@@ -184,6 +188,7 @@ static bool __journal_entry_close(struct journal *j)
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
+ /* Initialize new buffer: */
journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index ef4d48081975..f6c9681badea 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -469,7 +469,8 @@ static int jset_validate(struct bch_fs *c,
version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max, c,
"%s sector %llu seq %llu: unknown journal entry version %u",
- ca->name, sector, le64_to_cpu(jset->seq),
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq),
version)) {
/* don't try to continue: */
return EINVAL;
@@ -481,32 +482,42 @@ static int jset_validate(struct bch_fs *c,
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
- ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq), bytes)) {
ret = JOURNAL_ENTRY_BAD;
le32_add_cpu(&jset->u64s,
-((bytes - (bucket_sectors_left << 9)) / 8));
}
- if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
+ if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
- ca->name, sector, le64_to_cpu(jset->seq),
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq),
JSET_CSUM_TYPE(jset))) {
ret = JOURNAL_ENTRY_BAD;
- goto bad_csum_type;
+ goto csum_done;
}
+ if (write)
+ goto csum_done;
+
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
"%s sector %llu seq %llu: journal checksum bad",
- ca->name, sector, le64_to_cpu(jset->seq)))
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq)))
ret = JOURNAL_ENTRY_BAD;
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
jset->encrypted_start,
vstruct_end(jset) - (void *) jset->encrypted_start);
-bad_csum_type:
- if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
- "invalid journal entry: last_seq > seq")) {
+csum_done:
+ /* last_seq is ignored when JSET_NO_FLUSH is true */
+ if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
+ le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
+ "invalid journal entry: last_seq > seq (%llu > %llu)",
+ le64_to_cpu(jset->last_seq),
+ le64_to_cpu(jset->seq))) {
jset->last_seq = jset->seq;
return JOURNAL_ENTRY_BAD;
}
@@ -514,6 +525,14 @@ fsck_err:
return ret;
}
+static int jset_validate_for_write(struct bch_fs *c, struct jset *jset)
+{
+ unsigned sectors = vstruct_sectors(jset, c->block_bits);
+
+ return jset_validate(c, NULL, jset, 0, sectors, sectors, WRITE) ?:
+ jset_validate_entries(c, jset, WRITE);
+}
+
struct journal_read_buf {
void *data;
size_t size;
@@ -1081,9 +1100,7 @@ static void journal_write_done(struct closure *cl)
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
struct bch_replicas_padded replicas;
union journal_res_state old, new;
- u64 seq = le64_to_cpu(w->data->seq);
- u64 last_seq = le64_to_cpu(w->data->last_seq);
- u64 v;
+ u64 v, seq, last_seq;
int err = 0;
bch2_time_stats_update(j->write_time, j->write_start_time);
@@ -1101,6 +1118,9 @@ static void journal_write_done(struct closure *cl)
bch2_fatal_error(c);
spin_lock(&j->lock);
+ seq = le64_to_cpu(w->data->seq);
+ last_seq = le64_to_cpu(w->data->last_seq);
+
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs;
@@ -1108,7 +1128,7 @@ static void journal_write_done(struct closure *cl)
if (err && (!j->err_seq || seq < j->err_seq))
j->err_seq = seq;
- if (!w->noflush) {
+ if (!JSET_NO_FLUSH(w->data)) {
j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = last_seq;
}
@@ -1196,7 +1216,7 @@ void bch2_journal_write(struct closure *cl)
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)) {
w->noflush = true;
SET_JSET_NO_FLUSH(jset, true);
- jset->last_seq = cpu_to_le64(j->last_seq_ondisk);
+ jset->last_seq = 0;
j->nr_noflush_writes++;
} else {
@@ -1248,11 +1268,11 @@ void bch2_journal_write(struct closure *cl)
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true;
- if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
+ if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change)
validate_before_checksum = true;
if (validate_before_checksum &&
- jset_validate_entries(c, jset, WRITE))
+ jset_validate_for_write(c, jset))
goto err;
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
@@ -1263,7 +1283,7 @@ void bch2_journal_write(struct closure *cl)
journal_nonce(jset), jset);
if (!validate_before_checksum &&
- jset_validate_entries(c, jset, WRITE))
+ jset_validate_for_write(c, jset))
goto err;
sectors = vstruct_sectors(jset, c->block_bits);