summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/bcachefs.h3
-rw-r--r--fs/bcachefs/journal.c34
-rw-r--r--fs/bcachefs/journal.h2
-rw-r--r--fs/bcachefs/journal_io.c138
-rw-r--r--fs/bcachefs/journal_io.h3
-rw-r--r--fs/bcachefs/recovery.c68
-rw-r--r--fs/bcachefs/recovery.h2
-rw-r--r--fs/bcachefs/super.c3
8 files changed, 151 insertions, 102 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f2bb23162b4a..43e921b91d85 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -894,7 +894,8 @@ mempool_t bio_bounce_pages;
mempool_t btree_bounce_pool;
struct journal journal;
- struct list_head journal_entries;
+ GENRADIX(struct journal_replay *) journal_entries;
+ u64 journal_entries_base_seq;
struct journal_keys journal_keys;
struct list_head journal_iters;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index f87f76553bf4..f8b57de31d93 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1039,17 +1039,25 @@ void bch2_fs_journal_stop(struct journal *j)
cancel_delayed_work_sync(&j->write_work);
}
-int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
- struct list_head *journal_entries)
+int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_entry_pin_list *p;
- struct journal_replay *i;
+ struct journal_replay *i, **_i;
+ struct genradix_iter iter;
+ bool had_entries = false;
+ unsigned ptr;
u64 last_seq = cur_seq, nr, seq;
- if (!list_empty(journal_entries))
- last_seq = le64_to_cpu(list_last_entry(journal_entries,
- struct journal_replay, list)->j.last_seq);
+ genradix_for_each_reverse(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ last_seq = le64_to_cpu(i->j.last_seq);
+ break;
+ }
nr = cur_seq - last_seq;
@@ -1071,14 +1079,14 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
- if (list_empty(journal_entries))
- j->last_empty_seq = cur_seq - 1;
-
fifo_for_each_entry_ptr(p, &j->pin, seq)
journal_pin_list_init(p, 1);
- list_for_each_entry(i, journal_entries, list) {
- unsigned ptr;
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
seq = le64_to_cpu(i->j.seq);
BUG_ON(seq >= cur_seq);
@@ -1094,9 +1102,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
p->devs.nr = 0;
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
+
+ had_entries = true;
}
- if (list_empty(journal_entries))
+ if (!had_entries)
j->last_empty_seq = cur_seq;
spin_lock(&j->lock);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index c287ecf643aa..59453dcfa4e9 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -510,7 +510,7 @@ int bch2_dev_journal_alloc(struct bch_dev *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
-int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
+int bch2_fs_journal_start(struct journal *, u64);
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index fad142196daa..c84c0b840906 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -16,12 +16,22 @@
#include "replicas.h"
#include "trace.h"
-static void __journal_replay_free(struct journal_replay *i)
+static inline u32 journal_entry_radix_idx(struct bch_fs *c,
+ struct jset *j)
{
- list_del(&i->list);
+ return (le64_to_cpu(j->seq) - c->journal_entries_base_seq) & (~0U >> 1);
+}
+
+static void __journal_replay_free(struct bch_fs *c,
+ struct journal_replay *i)
+{
+ struct journal_replay **p =
+ genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, &i->j));
+
+ BUG_ON(*p != i);
+ *p = NULL;
kvpfree(i, offsetof(struct journal_replay, j) +
vstruct_bytes(&i->j));
-
}
static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
@@ -29,13 +39,12 @@ static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
i->ignore = true;
if (!c->opts.read_entire_journal)
- __journal_replay_free(i);
+ __journal_replay_free(c, i);
}
struct journal_list {
struct closure cl;
struct mutex lock;
- struct list_head *head;
int ret;
};
@@ -51,19 +60,30 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct journal_list *jlist, struct jset *j,
bool bad)
{
- struct journal_replay *i, *pos, *dup = NULL;
+ struct genradix_iter iter;
+ struct journal_replay **_i, *i, *dup;
struct journal_ptr *ptr;
- struct list_head *where;
size_t bytes = vstruct_bytes(j);
u64 last_seq = 0;
int ret = JOURNAL_ENTRY_ADD_OK;
+ /*
+ * Xarrays are indexed by a ulong, not a u64, so we can't index them by
+ * sequence number directly:
+ * Assume instead that they will all fall within the range of +-2billion
+ * of the filrst one we find.
+ */
+ if (!c->journal_entries_base_seq)
+ c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX);
+
+#if 0
list_for_each_entry_reverse(i, jlist->head, list) {
if (!JSET_NO_FLUSH(&i->j)) {
last_seq = le64_to_cpu(i->j.last_seq);
break;
}
}
+#endif
/* Is this entry older than the range we need? */
if (!c->opts.read_entire_journal &&
@@ -73,29 +93,21 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
}
/* Drop entries we don't need anymore */
- if (!JSET_NO_FLUSH(j)) {
- list_for_each_entry_safe(i, pos, jlist->head, list) {
+ if (!JSET_NO_FLUSH(j) && !c->opts.read_entire_journal) {
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i)
+ continue;
+
if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
break;
journal_replay_free(c, i);
}
}
- list_for_each_entry_reverse(i, jlist->head, list) {
- if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
- where = &i->list;
- goto add;
- }
- }
-
- where = jlist->head;
-add:
- dup = where->next != jlist->head
- ? container_of(where->next, struct journal_replay, list)
- : NULL;
-
- if (dup && le64_to_cpu(j->seq) != le64_to_cpu(dup->j.seq))
- dup = NULL;
+ _i = genradix_ptr(&c->journal_entries, journal_entry_radix_idx(c, j));
+ dup = _i ? *_i : NULL;
/*
* Duplicate journal entries? If so we want the one that didn't have a
@@ -131,10 +143,19 @@ add:
if (dup) {
i->nr_ptrs = dup->nr_ptrs;
memcpy(i->ptrs, dup->ptrs, sizeof(dup->ptrs));
- __journal_replay_free(dup);
+ __journal_replay_free(c, dup);
}
- list_add(&i->list, where);
+ _i = genradix_ptr_alloc(&c->journal_entries,
+ journal_entry_radix_idx(c, &i->j),
+ GFP_KERNEL);
+ if (!_i) {
+ bch_err(c, "failed to allocate c->journal_entries entry");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ *_i = i;
found:
for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
if (ptr->dev == ca->dev_idx) {
@@ -913,7 +934,8 @@ static void bch2_journal_read_device(struct closure *cl)
struct bch_fs *c = ca->fs;
struct journal_list *jlist =
container_of(cl->parent, struct journal_list, cl);
- struct journal_replay *r;
+ struct journal_replay *r, **_r;
+ struct genradix_iter iter;
struct journal_read_buf buf = { NULL, 0 };
u64 min_seq = U64_MAX;
unsigned i;
@@ -956,7 +978,12 @@ static void bch2_journal_read_device(struct closure *cl)
ja->sectors_free = ca->mi.bucket_size;
mutex_lock(&jlist->lock);
- list_for_each_entry(r, jlist->head, list) {
+ genradix_for_each(&c->journal_entries, iter, _r) {
+ r = *_r;
+
+ if (!r)
+ continue;
+
for (i = 0; i < r->nr_ptrs; i++) {
if (r->ptrs[i].dev == ca->dev_idx &&
sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) {
@@ -1022,11 +1049,11 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
}
}
-int bch2_journal_read(struct bch_fs *c, struct list_head *list,
- u64 *blacklist_seq, u64 *start_seq)
+int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
{
struct journal_list jlist;
- struct journal_replay *i, *t;
+ struct journal_replay *i, **_i, *prev = NULL;
+ struct genradix_iter radix_iter;
struct bch_dev *ca;
unsigned iter;
struct printbuf buf = PRINTBUF;
@@ -1037,7 +1064,6 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
closure_init_stack(&jlist.cl);
mutex_init(&jlist.lock);
- jlist.head = list;
jlist.ret = 0;
for_each_member_device(ca, c, iter) {
@@ -1061,22 +1087,21 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
if (jlist.ret)
return jlist.ret;
- if (list_empty(list)) {
- bch_info(c, "journal read done, but no entries found");
- return 0;
- }
-
- i = list_last_entry(list, struct journal_replay, list);
- *start_seq = le64_to_cpu(i->j.seq) + 1;
+ *start_seq = 0;
/*
* Find most recent flush entry, and ignore newer non flush entries -
* those entries will be blacklisted:
*/
- list_for_each_entry_safe_reverse(i, t, list, list) {
- if (i->ignore)
+ genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
continue;
+ if (!*start_seq)
+ *start_seq = le64_to_cpu(i->j.seq) + 1;
+
if (!JSET_NO_FLUSH(&i->j)) {
last_seq = le64_to_cpu(i->j.last_seq);
*blacklist_seq = le64_to_cpu(i->j.seq) + 1;
@@ -1086,6 +1111,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
journal_replay_free(c, i);
}
+ if (!*start_seq) {
+ bch_info(c, "journal read done, but no entries found");
+ return 0;
+ }
+
if (!last_seq) {
fsck_err(c, "journal read done, but no entries found after dropping non-flushes");
ret = -1;
@@ -1093,8 +1123,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
}
/* Drop blacklisted entries and entries older than last_seq: */
- list_for_each_entry_safe(i, t, list, list) {
- if (i->ignore)
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
continue;
seq = le64_to_cpu(i->j.seq);
@@ -1113,8 +1145,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
/* Check for missing entries: */
seq = last_seq;
- list_for_each_entry(i, list, list) {
- if (i->ignore)
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
continue;
BUG_ON(seq > le64_to_cpu(i->j.seq));
@@ -1136,11 +1170,9 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
!bch2_journal_seq_is_blacklisted(c, seq, false))
seq++;
- if (i->list.prev != list) {
- struct journal_replay *p = list_prev_entry(i, list);
-
- bch2_journal_ptrs_to_text(&buf1, c, p);
- pr_buf(&buf1, " size %zu", vstruct_sectors(&p->j, c->block_bits));
+ if (prev) {
+ bch2_journal_ptrs_to_text(&buf1, c, prev);
+ pr_buf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
} else
pr_buf(&buf1, "(none)");
bch2_journal_ptrs_to_text(&buf2, c, i);
@@ -1157,10 +1189,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
printbuf_exit(&buf2);
}
+ prev = i;
seq++;
}
- list_for_each_entry(i, list, list) {
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
struct bch_replicas_padded replicas = {
@@ -1169,7 +1202,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
};
unsigned ptr;
- if (i->ignore)
+ i = *_i;
+ if (!i || i->ignore)
continue;
ret = jset_validate_entries(c, &i->j, READ);
diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
index f2001835e43e..30e995c81fc4 100644
--- a/fs/bcachefs/journal_io.h
+++ b/fs/bcachefs/journal_io.h
@@ -7,7 +7,6 @@
* during cache_registration
*/
struct journal_replay {
- struct list_head list;
struct journal_ptr {
u8 dev;
u32 bucket;
@@ -53,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct journal_replay *);
-int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *);
+int bch2_journal_read(struct bch_fs *, u64 *, u64 *);
void bch2_journal_write(struct closure *);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 9120ed26250e..07ce6a540856 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -433,16 +433,16 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
/* sort and dedup all keys in the journal: */
-void bch2_journal_entries_free(struct list_head *list)
+void bch2_journal_entries_free(struct bch_fs *c)
{
-
- while (!list_empty(list)) {
- struct journal_replay *i =
- list_first_entry(list, struct journal_replay, list);
- list_del(&i->list);
- kvpfree(i, offsetof(struct journal_replay, j) +
- vstruct_bytes(&i->j));
- }
+ struct journal_replay **i;
+ struct genradix_iter iter;
+
+ genradix_for_each(&c->journal_entries, iter, i)
+ if (*i)
+ kvpfree(*i, offsetof(struct journal_replay, j) +
+ vstruct_bytes(&(*i)->j));
+ genradix_free(&c->journal_entries);
}
/*
@@ -476,15 +476,18 @@ void bch2_journal_keys_free(struct journal_keys *keys)
static int journal_keys_sort(struct bch_fs *c)
{
- struct journal_replay *i;
+ struct genradix_iter iter;
+ struct journal_replay *i, **_i;
struct jset_entry *entry;
struct bkey_i *k, *_n;
struct journal_keys *keys = &c->journal_keys;
struct journal_key *src, *dst;
size_t nr_keys = 0;
- list_for_each_entry(i, &c->journal_entries, list) {
- if (i->ignore)
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
continue;
if (!keys->journal_seq_base)
@@ -503,8 +506,10 @@ static int journal_keys_sort(struct bch_fs *c)
if (!keys->d)
return -ENOMEM;
- list_for_each_entry(i, &c->journal_entries, list) {
- if (i->ignore)
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
continue;
BUG_ON(le64_to_cpu(i->j.seq) - keys->journal_seq_base > U32_MAX);
@@ -751,10 +756,8 @@ static int journal_replay_entry_early(struct bch_fs *c,
}
static int journal_replay_early(struct bch_fs *c,
- struct bch_sb_field_clean *clean,
- struct list_head *journal)
+ struct bch_sb_field_clean *clean)
{
- struct journal_replay *i;
struct jset_entry *entry;
int ret;
@@ -767,8 +770,13 @@ static int journal_replay_early(struct bch_fs *c,
return ret;
}
} else {
- list_for_each_entry(i, journal, list) {
- if (i->ignore)
+ struct genradix_iter iter;
+ struct journal_replay *i, **_i;
+
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
continue;
vstruct_for_each(&i->j, entry) {
@@ -1093,17 +1101,17 @@ int bch2_fs_recovery(struct bch_fs *c)
}
if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
- struct journal_replay *i;
+ struct genradix_iter iter;
+ struct journal_replay **i;
bch_verbose(c, "starting journal read");
- ret = bch2_journal_read(c, &c->journal_entries,
- &blacklist_seq, &journal_seq);
+ ret = bch2_journal_read(c, &blacklist_seq, &journal_seq);
if (ret)
goto err;
- list_for_each_entry_reverse(i, &c->journal_entries, list)
- if (!i->ignore) {
- last_journal_entry = &i->j;
+ genradix_for_each_reverse(&c->journal_entries, iter, i)
+ if (*i && !(*i)->ignore) {
+ last_journal_entry = &(*i)->j;
break;
}
@@ -1152,7 +1160,7 @@ use_clean:
zero_out_btree_mem_ptr(&c->journal_keys);
- ret = journal_replay_early(c, clean, &c->journal_entries);
+ ret = journal_replay_early(c, clean);
if (ret)
goto err;
@@ -1175,8 +1183,7 @@ use_clean:
}
}
- ret = bch2_fs_journal_start(&c->journal, journal_seq,
- &c->journal_entries);
+ ret = bch2_fs_journal_start(&c->journal, journal_seq);
if (ret)
goto err;
@@ -1380,7 +1387,7 @@ out:
if (!c->opts.keep_journal) {
bch2_journal_keys_free(&c->journal_keys);
- bch2_journal_entries_free(&c->journal_entries);
+ bch2_journal_entries_free(c);
}
kfree(clean);
if (ret)
@@ -1401,7 +1408,6 @@ int bch2_fs_initialize(struct bch_fs *c)
struct qstr lostfound = QSTR("lost+found");
const char *err = "cannot allocate memory";
struct bch_dev *ca;
- LIST_HEAD(journal);
unsigned i;
int ret;
@@ -1441,7 +1447,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
- bch2_fs_journal_start(&c->journal, 1, &journal);
+ bch2_fs_journal_start(&c->journal, 1);
bch2_journal_set_replay_done(&c->journal);
err = "error going read-write";
diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
index 30580a8984a1..ab8b116ac7db 100644
--- a/fs/bcachefs/recovery.h
+++ b/fs/bcachefs/recovery.h
@@ -55,7 +55,7 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct btree *);
void bch2_journal_keys_free(struct journal_keys *);
-void bch2_journal_entries_free(struct list_head *);
+void bch2_journal_entries_free(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 2c3d0546f2b6..689c82f6cb5d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -457,7 +457,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_journal_keys_free(&c->journal_keys);
- bch2_journal_entries_free(&c->journal_entries);
+ bch2_journal_entries_free(c);
percpu_free_rwsem(&c->mark_lock);
free_percpu(c->online_reserved);
@@ -676,7 +676,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_WORK(&c->journal_seq_blacklist_gc_work,
bch2_blacklist_entries_gc);
- INIT_LIST_HEAD(&c->journal_entries);
INIT_LIST_HEAD(&c->journal_iters);
INIT_LIST_HEAD(&c->fsck_errors);