diff options
-rw-r--r-- | fs/bcachefs/btree_cache.c | 41 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 99 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/debug.c | 5 |
8 files changed, 99 insertions, 69 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 42253ca17f04..92a8cc704cab 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -40,6 +40,14 @@ static inline unsigned btree_cache_can_free(struct btree_cache *bc) return max_t(int, 0, bc->used - bc->reserve); } +static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) +{ + if (b->c.lock.readers) + list_move(&b->list, &bc->freed_pcpu); + else + list_move(&b->list, &bc->freed_nonpcpu); +} + static void btree_node_data_free(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; @@ -56,7 +64,8 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) b->aux_data = NULL; bc->used--; - list_move(&b->list, &bc->freed); + + btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -162,11 +171,6 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, b->c.level = level; b->c.btree_id = id; - if (level) - six_lock_pcpu_alloc(&b->c.lock); - else - six_lock_pcpu_free_rcu(&b->c.lock); - mutex_lock(&bc->lock); ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) @@ -432,8 +436,10 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(atomic_read(&c->btree_cache.dirty)); - while (!list_empty(&bc->freed)) { - b = list_first_entry(&bc->freed, struct btree, list); + list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); + + while (!list_empty(&bc->freed_nonpcpu)) { + b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); list_del(&b->list); six_lock_pcpu_free(&b->c.lock); kfree(b); @@ -487,7 +493,8 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc) mutex_init(&bc->lock); INIT_LIST_HEAD(&bc->live); INIT_LIST_HEAD(&bc->freeable); - INIT_LIST_HEAD(&bc->freed); + INIT_LIST_HEAD(&bc->freed_pcpu); + INIT_LIST_HEAD(&bc->freed_nonpcpu); } /* @@ -562,9 +569,12 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) } } -struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) +struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks) { struct btree_cache *bc = &c->btree_cache; + struct list_head *freed = pcpu_read_locks + ? &bc->freed_pcpu + : &bc->freed_nonpcpu; struct btree *b, *b2; u64 start_time = local_clock(); unsigned flags; @@ -576,7 +586,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ - list_for_each_entry(b, &bc->freed, list) + list_for_each_entry(b, freed, list) if (!btree_node_reclaim(c, b)) { list_del_init(&b->list); goto got_node; @@ -586,6 +596,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) if (!b) goto err_locked; + if (pcpu_read_locks) + six_lock_pcpu_alloc(&b->c.lock); + BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); got_node: @@ -598,7 +611,7 @@ got_node: if (!btree_node_reclaim(c, b2)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); - list_move(&b2->list, &bc->freed); + btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; @@ -643,7 +656,7 @@ err_locked: if (b) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); - list_move(&b2->list, &bc->freed); + btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); } else { @@ -688,7 +701,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return ERR_PTR(-EINTR); } - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, level != 0); if (trans && b == ERR_PTR(-ENOMEM)) { trans->memory_allocation_failure = true; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 96f8f90e85a1..83723805f12a 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -20,7 +20,7 @@ void bch2_btree_cache_cannibalize_unlock(struct bch_fs *); int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); -struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); +struct btree *bch2_btree_node_mem_alloc(struct bch_fs *, bool); struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *, const struct bkey_i *, unsigned, diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 53f83340f69a..3031b566a112 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1542,7 +1542,7 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, closure_sync(&cl); } while (ret); - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, level != 0); bch2_btree_cache_cannibalize_unlock(c); BUG_ON(IS_ERR(b)); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 70f31b5379e7..7e41552a57df 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -166,13 +166,13 @@ btree_key_cache_create(struct bch_fs *c, } was_new = false; + } else { + if (btree_id == BTREE_ID_subvolumes) + six_lock_pcpu_alloc(&ck->c.lock); + else + six_lock_pcpu_free(&ck->c.lock); } - if (btree_id == BTREE_ID_subvolumes) - six_lock_pcpu_alloc(&ck->c.lock); - else - six_lock_pcpu_free(&ck->c.lock); - ck->c.level = 0; ck->c.btree_id = btree_id; ck->key.btree_id = btree_id; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 561406b4b7c2..51eb686331bf 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -152,7 +152,8 @@ struct btree_cache { struct mutex lock; struct list_head live; struct list_head freeable; - struct list_head freed; + struct list_head freed_pcpu; + struct list_head freed_nonpcpu; /* Number of elements in live + freeable lists */ unsigned used; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 523d1146b2e2..43022b340f4e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -181,6 +181,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, struct disk_reservation *res, struct closure *cl, + bool interior_node, unsigned flags) { struct write_point *wp; @@ -242,7 +243,7 @@ retry: bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); mem_alloc: - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, interior_node); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -260,12 +261,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev { struct bch_fs *c = as->c; struct btree *b; + struct prealloc_nodes *p = &as->prealloc_nodes[!!level]; int ret; BUG_ON(level >= BTREE_MAX_DEPTH); - BUG_ON(!as->nr_prealloc_nodes); + BUG_ON(!p->nr); - b = as->prealloc_nodes[--as->nr_prealloc_nodes]; + b = p->b[--p->nr]; six_lock_intent(&b->c.lock, NULL, NULL); six_lock_write(&b->c.lock, NULL, NULL); @@ -377,43 +379,49 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level) static void bch2_btree_reserve_put(struct btree_update *as) { struct bch_fs *c = as->c; + struct prealloc_nodes *p; mutex_lock(&c->btree_reserve_cache_lock); - while (as->nr_prealloc_nodes) { - struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes]; + for (p = as->prealloc_nodes; + p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes); + p++) { + while (p->nr) { + struct btree *b = p->b[--p->nr]; - six_lock_intent(&b->c.lock, NULL, NULL); - six_lock_write(&b->c.lock, NULL, NULL); + six_lock_intent(&b->c.lock, NULL, NULL); + six_lock_write(&b->c.lock, NULL, NULL); - if (c->btree_reserve_cache_nr < - ARRAY_SIZE(c->btree_reserve_cache)) { - struct btree_alloc *a = - &c->btree_reserve_cache[c->btree_reserve_cache_nr++]; + if (c->btree_reserve_cache_nr < + ARRAY_SIZE(c->btree_reserve_cache)) { + struct btree_alloc *a = + &c->btree_reserve_cache[c->btree_reserve_cache_nr++]; - a->ob = b->ob; - b->ob.nr = 0; - bkey_copy(&a->k, &b->key); - } else { - bch2_open_buckets_put(c, &b->ob); - } + a->ob = b->ob; + b->ob.nr = 0; + bkey_copy(&a->k, &b->key); + } else { + bch2_open_buckets_put(c, &b->ob); + } - __btree_node_free(c, b); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); + __btree_node_free(c, b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + } } mutex_unlock(&c->btree_reserve_cache_lock); } -static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes, +static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes[2], unsigned flags, struct closure *cl) { struct bch_fs *c = as->c; struct btree *b; + unsigned interior; int ret; - BUG_ON(nr_nodes > BTREE_RESERVE_MAX); + BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); /* * Protects reaping from the btree node cache and using the btree node @@ -423,23 +431,28 @@ static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes, if (ret) return ret; - while (as->nr_prealloc_nodes < nr_nodes) { - b = __bch2_btree_node_alloc(c, &as->disk_res, - flags & BTREE_INSERT_NOWAIT - ? NULL : cl, flags); - if (IS_ERR(b)) { - ret = PTR_ERR(b); - goto err_free; - } + for (interior = 0; interior < 2; interior++) { + struct prealloc_nodes *p = as->prealloc_nodes + interior; + + while (p->nr < nr_nodes[interior]) { + b = __bch2_btree_node_alloc(c, &as->disk_res, + flags & BTREE_INSERT_NOWAIT + ? NULL : cl, + interior, flags); + if (IS_ERR(b)) { + ret = PTR_ERR(b); + goto err; + } - as->prealloc_nodes[as->nr_prealloc_nodes++] = b; + p->b[p->nr++] = b; + } } bch2_btree_cache_cannibalize_unlock(c); return 0; -err_free: +err: bch2_btree_cache_cannibalize_unlock(c); - trace_btree_reserve_get_fail(c, nr_nodes, cl); + trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], cl); return ret; } @@ -942,7 +955,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, u64 start_time = local_clock(); int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) ? BCH_DISK_RESERVATION_NOFAIL : 0; - unsigned nr_nodes; + unsigned nr_nodes[2]; unsigned update_level = level; int journal_flags = 0; int ret = 0; @@ -954,10 +967,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, closure_init_stack(&cl); retry: - nr_nodes = 0; + nr_nodes[0] = nr_nodes[1] = 0; + update_level = level; while (1) { - nr_nodes += 1 + split; + nr_nodes[!!update_level] += 1 + split; update_level++; if (!btree_path_node(path, update_level)) @@ -972,7 +986,7 @@ retry: /* Might have to allocate a new root: */ if (update_level < BTREE_MAX_DEPTH) - nr_nodes += 1; + nr_nodes[1] += 1; if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, @@ -1050,7 +1064,7 @@ retry: } ret = bch2_disk_reservation_get(c, &as->disk_res, - nr_nodes * btree_sectors(c), + (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), c->opts.metadata_replicas, disk_res_flags); if (ret) @@ -1085,11 +1099,6 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) list_del_init(&b->list); mutex_unlock(&c->btree_cache.lock); - if (b->c.level) - six_lock_pcpu_alloc(&b->c.lock); - else - six_lock_pcpu_free(&b->c.lock); - mutex_lock(&c->btree_root_lock); BUG_ON(btree_node_root(c, b) && (b->c.level < btree_node_root(c, b)->c.level || @@ -2015,7 +2024,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite return -EINTR; } - new_hash = bch2_btree_node_mem_alloc(c); + new_hash = bch2_btree_node_mem_alloc(c, false); } path->intent_ref++; @@ -2091,7 +2100,7 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) closure_sync(&cl); } while (ret); - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, false); bch2_btree_cache_cannibalize_unlock(c); set_btree_node_fake(b); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 8dc86fa636d6..e72eb8795616 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -76,8 +76,10 @@ struct btree_update { struct journal_entry_pin journal; /* Preallocated nodes we reserve when we start the update: */ - struct btree *prealloc_nodes[BTREE_UPDATE_NODES_MAX]; - unsigned nr_prealloc_nodes; + struct prealloc_nodes { + struct btree *b[BTREE_UPDATE_NODES_MAX]; + unsigned nr; + } prealloc_nodes[2]; /* Nodes being freed: */ struct keylist old_keys; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 1fff03d301a9..457fcee7d8e1 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -443,6 +443,11 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * bch2_flags_to_text(out, bch2_btree_node_flags, b->flags); pr_newline(out); + pr_buf(out, "pcpu read locks: "); + pr_tab(out); + pr_buf(out, "%u", b->c.lock.readers != NULL); + pr_newline(out); + pr_buf(out, "written:"); pr_tab(out); pr_buf(out, "%u", b->written); |