diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-04-12 00:20:04 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-04-12 00:23:47 +0300 |
commit | 94426ed2137a948d212302df571445d4245772b9 (patch) | |
tree | 2bf20010e453ba302a818ed70271e073078bc4ba /fs | |
parent | a55b39e858901986408391c574f414ef889f1c53 (diff) | |
parent | 2ae9a8972ce04046957f8af214509cebfd3bfb9c (diff) | |
download | linux-94426ed2137a948d212302df571445d4245772b9.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
net/unix/garbage.c
47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()")
4090fa373f0e ("af_unix: Replace garbage collection algorithm.")
Adjacent changes:
drivers/net/ethernet/broadcom/bnxt/bnxt.c
faa12ca24558 ("bnxt_en: Reset PTP tx_avail after possible firmware reset")
b3d0083caf9a ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()")
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
7ac10c7d728d ("bnxt_en: Fix possible memory leak in bnxt_rdma_aux_device_init()")
194fad5b2781 ("bnxt_en: Refactor bnxt_rdma_aux_device_init/uninit functions")
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
958f56e48385 ("net/mlx5e: Un-expose functions in en.h")
49e6c9387051 ("net/mlx5e: RSS, Block XOR hash with over 128 channels")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'fs')
72 files changed, 875 insertions, 507 deletions
@@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kiocb *iocb) spin_lock_irqsave(&ctx->wait.lock, flags); list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry) if (avail >= curr->min_nr) { - list_del_init_careful(&curr->w.entry); wake_up_process(curr->w.private); + list_del_init_careful(&curr->w.entry); } spin_unlock_irqrestore(&ctx->wait.lock, flags); } diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 3640f417cce1..5c180fdc3efb 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; - struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; struct bkey_s_c k; int ret; @@ -290,28 +289,27 @@ retry: ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, inode_inum(inode), &search, 0); - if (ret) { - if (!bch2_err_matches(ret, ENOENT)) - acl = ERR_PTR(ret); - goto out; - } + if (ret) + goto err; k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); - if (ret) { - acl = ERR_PTR(ret); - goto out; - } + if (ret) + goto err; - xattr = bkey_s_c_to_xattr(k); + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); acl = bch2_acl_from_disk(trans, xattr_val(xattr.v), - le16_to_cpu(xattr.v->x_val_len)); + le16_to_cpu(xattr.v->x_val_len)); + ret = PTR_ERR_OR_ZERO(acl); +err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; - if (!IS_ERR(acl)) + if (ret) + acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL; + + if (!IS_ERR_OR_NULL(acl)) set_cached_acl(&inode->v, type, acl); -out: - if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart)) - goto retry; bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 63102992d955..364ae42022af 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1535,6 +1535,20 @@ enum btree_id { BTREE_ID_NR }; +static inline bool btree_id_is_alloc(enum btree_id id) +{ + switch (id) { + case BTREE_ID_alloc: + case BTREE_ID_backpointers: + case BTREE_ID_need_discard: + case BTREE_ID_freespace: + case BTREE_ID_bucket_gens: + return true; + default: + return false; + } +} + #define BTREE_MAX_DEPTH 4U /* Btree nodes */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6280da1244b5..d2555da55c6d 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -368,11 +368,16 @@ again: buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); cur = NULL; - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: - bch2_journal_key_delete(c, b->c.btree_id, - b->c.level, cur_k.k->k.p); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); if (ret) break; + + if (!btree_id_is_alloc(b->c.btree_id)) { + ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + break; + } continue; } @@ -544,12 +549,12 @@ reconstruct_root: bch2_btree_root_alloc_fake(c, i, 0); } else { bch2_btree_root_alloc_fake(c, i, 1); + bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); if (ret) break; } - bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); reconstructed_root = true; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 24772538e4cc..1d58d447b386 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -642,7 +642,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *); static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8) + if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8) return __bch2_btree_trans_too_many_iters(trans); return 0; diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 5cbcbfe85235..1e8cf49a6935 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx); } +static void journal_iter_verify(struct journal_iter *iter) +{ + struct journal_keys *keys = iter->keys; + size_t gap_size = keys->size - keys->nr; + + BUG_ON(iter->idx >= keys->gap && + iter->idx < keys->gap + gap_size); + + if (iter->idx < keys->size) { + struct journal_key *k = keys->data + iter->idx; + + int cmp = cmp_int(k->btree_id, iter->btree_id) ?: + cmp_int(k->level, iter->level); + BUG_ON(cmp < 0); + } +} + static void journal_iters_fix(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; /* The key we just inserted is immediately before the gap: */ size_t gap_end = keys->gap + (keys->size - keys->nr); - struct btree_and_journal_iter *iter; + struct journal_key *new_key = &keys->data[keys->gap - 1]; + struct journal_iter *iter; /* * If an iterator points one after the key we just inserted, decrement @@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c) * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will * handle that: */ - list_for_each_entry(iter, &c->journal_iters, journal.list) - if (iter->journal.idx == gap_end) - iter->journal.idx = keys->gap - 1; + list_for_each_entry(iter, &c->journal_iters, list) { + journal_iter_verify(iter); + if (iter->idx == gap_end && + new_key->btree_id == iter->btree_id && + new_key->level == iter->level) + iter->idx = keys->gap - 1; + journal_iter_verify(iter); + } } static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap) @@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, if (idx > keys->gap) idx -= keys->size - keys->nr; + size_t old_gap = keys->gap; + if (keys->nr == keys->size) { + journal_iters_move_gap(c, old_gap, keys->size); + old_gap = keys->size; + struct journal_keys new_keys = { .nr = keys->nr, .size = max_t(size_t, keys->size, 8) * 2, @@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, keys->gap = keys->nr; } - journal_iters_move_gap(c, keys->gap, idx); + journal_iters_move_gap(c, old_gap, idx); move_gap(keys, idx); @@ -301,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) { - struct journal_key *k = iter->keys->data + iter->idx; + journal_iter_verify(iter); + + while (iter->idx < iter->keys->size) { + struct journal_key *k = iter->keys->data + iter->idx; + + int cmp = cmp_int(k->btree_id, iter->btree_id) ?: + cmp_int(k->level, iter->level); + if (cmp > 0) + break; + BUG_ON(cmp); - while (k < iter->keys->data + iter->keys->size && - k->btree_id == iter->btree_id && - k->level == iter->level) { if (!k->overwritten) return bkey_i_to_s_c(k->k); bch2_journal_iter_advance(iter); - k = iter->keys->data + iter->idx; } return bkey_s_c_null; @@ -330,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c, iter->level = level; iter->keys = &c->journal_keys; iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos); + + journal_iter_verify(iter); } static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) @@ -434,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, iter->trans = trans; iter->b = b; iter->node_iter = node_iter; - bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); - INIT_LIST_HEAD(&iter->journal.list); iter->pos = b->data->min_key; iter->at_end = false; + INIT_LIST_HEAD(&iter->journal.list); + + if (trans->journal_replay_not_finished) { + bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); + if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) + list_add(&iter->journal.list, &trans->c->journal_iters); + } } /* @@ -452,9 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, bch2_btree_node_iter_init_from_start(&node_iter, b); __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key); - if (trans->journal_replay_not_finished && - !test_bit(BCH_FS_may_go_rw, &trans->c->flags)) - list_add(&iter->journal.list, &trans->c->journal_iters); } /* sort and dedup all keys in the journal: */ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 581edcb0911b..88a3582a3275 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, } else { mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_pcpu); + bc->nr_freed_pcpu++; mutex_unlock(&bc->lock); } } @@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, if (!list_empty(&bc->freed_pcpu)) { ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list); list_del_init(&ck->list); + bc->nr_freed_pcpu--; } mutex_unlock(&bc->lock); } @@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, commit_flags |= BCH_WATERMARK_reclaim; if (ck->journal.seq != journal_last_seq(j) || - j->watermark == BCH_WATERMARK_stripe) + !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; ret = bch2_btree_iter_traverse(&b_iter) ?: diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index b9b151e693ed..f2caf491957e 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, struct btree_path *path, struct btree_bkey_cached_common *b) { - struct btree_path *linked; - unsigned i, iter; - int ret; - - /* - * XXX BIG FAT NOTICE - * - * Drop all read locks before taking a write lock: - * - * This is a hack, because bch2_btree_node_lock_write_nofail() is a - * hack - but by dropping read locks first, this should never fail, and - * we only use this in code paths where whatever read locks we've - * already taken are no longer needed: - */ - - trans_for_each_path(trans, linked, iter) { - if (!linked->nodes_locked) - continue; - - for (i = 0; i < BTREE_MAX_DEPTH; i++) - if (btree_node_read_locked(linked, i)) { - btree_node_unlock(trans, linked, i); - btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK); - } - } - - ret = __btree_node_lock_write(trans, path, b, true); + int ret = __btree_node_lock_write(trans, path, b, true); BUG_ON(ret); } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 3f33be7e5e5c..556f76f5c84e 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -133,6 +133,9 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, if (le64_to_cpu(bn->magic) != bset_magic(c)) return; + if (btree_id_is_alloc(BTREE_NODE_ID(bn))) + return; + rcu_read_lock(); struct found_btree_node n = { .btree_id = BTREE_NODE_ID(bn), @@ -213,6 +216,9 @@ static int read_btree_nodes(struct find_btree_nodes *f) closure_init_stack(&cl); for_each_online_member(c, ca) { + if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree))) + continue; + struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL); struct task_struct *t; @@ -290,7 +296,7 @@ again: found_btree_node_to_text(&buf, c, n); bch_err(c, "%s", buf.buf); printbuf_exit(&buf); - return -1; + return -BCH_ERR_fsck_repair_unimplemented; } } @@ -436,6 +442,9 @@ bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, unsigned level, struct bpos node_min, struct bpos node_max) { + if (btree_id_is_alloc(btree)) + return 0; + struct find_btree_nodes *f = &c->found_btree_nodes; int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 9404d96c38f3..e0c982a4195c 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -364,7 +364,21 @@ struct btree_insert_entry { unsigned long ip_allocated; }; +/* Number of btree paths we preallocate, usually enough */ #define BTREE_ITER_INITIAL 64 +/* + * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code + * paths should run inside this limit, and if they don't it usually indicates a + * bug (leaking/duplicated btree paths). + * + * exception: some fsck paths + * + * bugs with excessive path usage seem to have possibly been eliminated now, so + * we might consider eliminating this (and btree_trans_too_many_iter()) at some + * point. + */ +#define BTREE_ITER_NORMAL_LIMIT 256 +/* never exceed limit */ #define BTREE_ITER_MAX (1U << 10) struct btree_trans_commit_hook; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 32397b99752f..c4a5e83a56a4 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -26,9 +26,9 @@ #include <linux/random.h> -const char * const bch2_btree_update_modes[] = { +static const char * const bch2_btree_update_modes[] = { #define x(t) #t, - BCH_WATERMARKS() + BTREE_UPDATE_MODES() #undef x NULL }; @@ -704,9 +704,13 @@ static void btree_update_nodes_written(struct btree_update *as) bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, "%s", bch2_err_str(ret)); err: - if (as->b) { - - b = as->b; + /* + * We have to be careful because another thread might be getting ready + * to free as->b and calling btree_update_reparent() on us - we'll + * recheck under btree_update_lock below: + */ + b = READ_ONCE(as->b); + if (b) { btree_path_idx_t path_idx = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p); struct btree_path *path = trans->paths + path_idx; @@ -850,15 +854,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - BUG_ON(as->mode != BTREE_UPDATE_none); + BUG_ON(as->update_level_end < b->c.level); BUG_ON(!btree_node_dirty(b)); BUG_ON(!b->c.level); + mutex_lock(&c->btree_interior_update_lock); + list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); + as->mode = BTREE_UPDATE_node; as->b = b; + as->update_level_end = b->c.level; set_btree_node_write_blocked(b); list_add(&as->write_blocked_list, &b->write_blocked); @@ -1100,7 +1106,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - unsigned level, bool split, unsigned flags) + unsigned level_start, bool split, unsigned flags) { struct bch_fs *c = trans->c; struct btree_update *as; @@ -1108,7 +1114,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc) ? BCH_DISK_RESERVATION_NOFAIL : 0; unsigned nr_nodes[2] = { 0, 0 }; - unsigned update_level = level; + unsigned level_end = level_start; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; int ret = 0; u32 restart_count = trans->restart_count; @@ -1123,34 +1129,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; - if (watermark < c->journal.watermark) { - struct journal_res res = { 0 }; - unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; + if (watermark < BCH_WATERMARK_reclaim && + test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) { + if (flags & BCH_TRANS_COMMIT_journal_reclaim) + return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock); - if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && - watermark < BCH_WATERMARK_reclaim) - journal_flags |= JOURNAL_RES_GET_NONBLOCK; - - ret = drop_locks_do(trans, - bch2_journal_res_get(&c->journal, &res, 1, journal_flags)); - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) - ret = -BCH_ERR_journal_reclaim_would_deadlock; + bch2_trans_unlock(trans); + wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)); + ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); } while (1) { - nr_nodes[!!update_level] += 1 + split; - update_level++; + nr_nodes[!!level_end] += 1 + split; + level_end++; - ret = bch2_btree_path_upgrade(trans, path, update_level + 1); + ret = bch2_btree_path_upgrade(trans, path, level_end + 1); if (ret) return ERR_PTR(ret); - if (!btree_path_node(path, update_level)) { + if (!btree_path_node(path, level_end)) { /* Allocating new root? */ nr_nodes[1] += split; - update_level = BTREE_MAX_DEPTH; + level_end = BTREE_MAX_DEPTH; break; } @@ -1158,11 +1160,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, * Always check for space for two keys, even if we won't have to * split at prior level - it might have been a merge instead: */ - if (bch2_btree_node_insert_fits(path->l[update_level].b, + if (bch2_btree_node_insert_fits(path->l[level_end].b, BKEY_BTREE_PTR_U64s_MAX * 2)) break; - split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); + split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); } if (!down_read_trylock(&c->gc_lock)) { @@ -1176,14 +1178,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS); memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); - as->c = c; - as->start_time = start_time; - as->ip_started = _RET_IP_; - as->mode = BTREE_UPDATE_none; - as->watermark = watermark; - as->took_gc_lock = true; - as->btree_id = path->btree_id; - as->update_level = update_level; + as->c = c; + as->start_time = start_time; + as->ip_started = _RET_IP_; + as->mode = BTREE_UPDATE_none; + as->watermark = watermark; + as->took_gc_lock = true; + as->btree_id = path->btree_id; + as->update_level_start = level_start; + as->update_level_end = level_end; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -1373,12 +1376,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, } static void -__bch2_btree_insert_keys_interior(struct btree_update *as, - struct btree_trans *trans, - struct btree_path *path, - struct btree *b, - struct btree_node_iter node_iter, - struct keylist *keys) +bch2_btree_insert_keys_interior(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct btree_node_iter node_iter, + struct keylist *keys) { struct bkey_i *insert = bch2_keylist_front(keys); struct bkey_packed *k; @@ -1534,7 +1537,7 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); BUG_ON(bch2_btree_node_check_topology(trans, b)); } @@ -1714,27 +1717,6 @@ err: goto out; } -static void -bch2_btree_insert_keys_interior(struct btree_update *as, - struct btree_trans *trans, - struct btree_path *path, - struct btree *b, - struct keylist *keys) -{ - struct btree_path *linked; - unsigned i; - - __bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - - btree_update_updated_node(as, b); - - trans_for_each_path_with_node(trans, b, linked, i) - bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); - - bch2_trans_verify_paths(trans); -} - /** * bch2_btree_insert_node - insert bkeys into a given btree node * @@ -1755,7 +1737,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t struct keylist *keys) { struct bch_fs *c = as->c; - struct btree_path *path = trans->paths + path_idx; + struct btree_path *path = trans->paths + path_idx, *linked; + unsigned i; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; @@ -1784,7 +1767,13 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); + + trans_for_each_path_with_node(trans, b, linked, i) + bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); + + bch2_trans_verify_paths(trans); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s; @@ -1798,6 +1787,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_maybe_compact_whiteouts(c, b)) bch2_trans_node_reinit_iter(trans, b); + btree_update_updated_node(as, b); bch2_btree_node_unlock_write(trans, path, b); BUG_ON(bch2_btree_node_check_topology(trans, b)); @@ -1807,7 +1797,7 @@ split: * We could attempt to avoid the transaction restart, by calling * bch2_btree_path_upgrade() and allocating more nodes: */ - if (b->c.level >= as->update_level) { + if (b->c.level >= as->update_level_end) { trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b); return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); } @@ -2519,9 +2509,11 @@ void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned lev static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) { - prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", (void *) as->ip_started, bch2_btree_id_str(as->btree_id), + as->update_level_start, + as->update_level_end, bch2_watermarks[as->watermark], bch2_btree_update_modes[as->mode], as->nodes_written, diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 88dcf5a22a3b..c1a479ebaad1 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -57,7 +57,8 @@ struct btree_update { unsigned took_gc_lock:1; enum btree_id btree_id; - unsigned update_level; + unsigned update_level_start; + unsigned update_level_end; struct disk_reservation disk_res; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index cbfa6459bdbc..72781aad6ba7 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg struct fsck_thread { struct thread_with_stdio thr; struct bch_fs *c; - char **devs; - size_t nr_devs; struct bch_opts opts; }; static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) { struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); - if (thr->devs) - for (size_t i = 0; i < thr->nr_devs; i++) - kfree(thr->devs[i]); - kfree(thr->devs); kfree(thr); } static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) { struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); - struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts); - - if (IS_ERR(c)) - return PTR_ERR(c); + struct bch_fs *c = thr->c; - int ret = 0; - if (test_bit(BCH_FS_errors_fixed, &c->flags)) - ret |= 1; - if (test_bit(BCH_FS_error, &c->flags)) - ret |= 4; + int ret = PTR_ERR_OR_ZERO(c); + if (ret) + return ret; - bch2_fs_stop(c); + ret = bch2_fs_start(thr->c); + if (ret) + goto err; - if (ret & 1) + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); - if (ret & 4) + ret |= 1; + } + if (test_bit(BCH_FS_error, &c->flags)) { bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); - + ret |= 4; + } +err: + bch2_fs_stop(c); return ret; } @@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a { struct bch_ioctl_fsck_offline arg; struct fsck_thread *thr = NULL; - u64 *devs = NULL; + darray_str(devs) = {}; long ret = 0; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -194,29 +190,32 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) || - !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) || - !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) { - ret = -ENOMEM; - goto err; - } + for (size_t i = 0; i < arg.nr_devs; i++) { + u64 dev_u64; + ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); + if (ret) + goto err; - thr->opts = bch2_opts_empty(); - thr->nr_devs = arg.nr_devs; + char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); + ret = PTR_ERR_OR_ZERO(dev_str); + if (ret) + goto err; - if (copy_from_user(devs, &user_arg->devs[0], - array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) { - ret = -EINVAL; - goto err; + ret = darray_push(&devs, dev_str); + if (ret) { + kfree(dev_str); + goto err; + } } - for (size_t i = 0; i < arg.nr_devs; i++) { - thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX); - ret = PTR_ERR_OR_ZERO(thr->devs[i]); - if (ret) - goto err; + thr = kzalloc(sizeof(*thr), GFP_KERNEL); + if (!thr) { + ret = -ENOMEM; + goto err; } + thr->opts = bch2_opts_empty(); + if (arg.opts) { char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); @@ -230,15 +229,26 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); + /* We need request_key() to be called before we punt to kthread: */ + opt_set(thr->opts, nostart, true); + + thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); + + if (!IS_ERR(thr->c) && + thr->c->opts.errors == BCH_ON_ERROR_panic) + thr->c->opts.errors = BCH_ON_ERROR_ro; + ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops); -err: - if (ret < 0) { - if (thr) - bch2_fsck_thread_exit(&thr->thr); - pr_err("ret %s", bch2_err_str(ret)); - } - kfree(devs); +out: + darray_for_each(devs, i) + kfree(*i); + darray_exit(&devs); return ret; +err: + if (thr) + bch2_fsck_thread_exit(&thr->thr); + pr_err("ret %s", bch2_err_str(ret)); + goto out; } static long bch2_global_ioctl(unsigned cmd, void __user *arg) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 34731ee0217f..0022b51ce3c0 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -598,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans, i++; } + unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have)); + /* * If current extent durability is less than io_opts.data_replicas, * we're not trying to rereplicate the extent up to data_replicas here - @@ -607,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans, * rereplicate, currently, so that users don't get an unexpected -ENOSPC */ if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) && - durability_have >= io_opts.data_replicas) { + !durability_required) { m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; m->data_opts.rewrite_ptrs = 0; /* if iter == NULL, it's just a promote */ @@ -616,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans, goto done; } - m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) + + m->op.nr_replicas = min(durability_removing, durability_required) + m->data_opts.extra_replicas; - m->op.nr_replicas_required = m->op.nr_replicas; - BUG_ON(!m->op.nr_replicas); + /* + * If device(s) were set to durability=0 after data was written to them + * we can end up with a duribilty=0 extent, and the normal algorithm + * that tries not to increase durability doesn't work: + */ + if (!(durability_have + durability_removing)) + m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); + + m->op.nr_replicas_required = m->op.nr_replicas; if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 208ce6f0fc43..cd99b7399414 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -13,6 +13,7 @@ #include "btree_iter.h" #include "btree_locking.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "buckets.h" #include "debug.h" #include "error.h" @@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, i->size = size; i->ret = 0; - do { + while (1) { err = flush_buf(i); if (err) return err; @@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, if (!i->size) break; + if (done) + break; + done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); i->iter++; - } while (!done); + } if (i->buf.allocation_failure) return -ENOMEM; @@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = { .read = bch2_journal_pins_read, }; +static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + if (!i->iter) { + bch2_btree_updates_to_text(&i->buf, c); + i->iter++; + } + + err = flush_buf(i); + if (err) + return err; + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations btree_updates_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_btree_updates_read, +}; + static int btree_transaction_stats_open(struct inode *inode, struct file *file) { struct bch_fs *c = inode->i_private; struct dump_iter *i; i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); - if (!i) return -ENOMEM; @@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c) debugfs_remove_recursive(c->fs_debug_dir); } +static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd) +{ + struct dentry *d; + + d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir); + + debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops); + + debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops); + + debugfs_create_file("bfloat-failed", 0400, d, bd, + &bfloat_failed_debug_ops); +} + void bch2_fs_debug_init(struct bch_fs *c) { struct btree_debug *bd; @@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); + debugfs_create_file("btree_updates", 0400, c->fs_debug_dir, + c->btree_debug, &btree_updates_ops); + debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, c, &btree_transaction_stats_op); @@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c) bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - debugfs_create_file(bch2_btree_id_str(bd->id), - 0400, c->btree_debug_dir, bd, - &btree_debug_ops); - - snprintf(name, sizeof(name), "%s-formats", - bch2_btree_id_str(bd->id)); - - debugfs_create_file(name, 0400, c->btree_debug_dir, bd, - &btree_format_debug_ops); - - snprintf(name, sizeof(name), "%s-bfloat-failed", - bch2_btree_id_str(bd->id)); - - debugfs_create_file(name, 0400, c->btree_debug_dir, bd, - &bfloat_failed_debug_ops); + bch2_fs_debug_btree_init(c, bd); } } diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c index 4ce5e957a6e9..0f955c3c76a7 100644 --- a/fs/bcachefs/eytzinger.c +++ b/fs/bcachefs/eytzinger.c @@ -115,7 +115,7 @@ static void swap_bytes(void *a, void *b, size_t n) struct wrapper { cmp_func_t cmp; - swap_func_t swap; + swap_func_t swap_func; }; /* @@ -125,7 +125,7 @@ struct wrapper { static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv) { if (swap_func == SWAP_WRAPPER) { - ((const struct wrapper *)priv)->swap(a, b, (int)size); + ((const struct wrapper *)priv)->swap_func(a, b, (int)size); return; } @@ -174,7 +174,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size, int i, c, r; /* called from 'sort' without swap function, let's pick the default */ - if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap) + if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func) swap_func = NULL; if (!swap_func) { @@ -227,7 +227,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size, { struct wrapper w = { .cmp = cmp_func, - .swap = swap_func, + .swap_func = swap_func, }; return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index ee0e2df33322..24840aee335c 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -242,8 +242,8 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) (_i) = eytzinger0_next((_i), (_size))) /* return greatest node <= @search, or -1 if not found */ -static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, - cmp_func_t cmp, const void *search) +static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, + cmp_func_t cmp, const void *search) { unsigned i, n = 0; @@ -256,18 +256,32 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, } while (n < nr); if (n & 1) { - /* @i was greater than @search, return previous node: */ + /* + * @i was greater than @search, return previous node: + * + * if @i was leftmost/smallest element, + * eytzinger0_prev(eytzinger0_first())) returns -1, as expected + */ return eytzinger0_prev(i, nr); } else { return i; } } -static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size, - cmp_func_t cmp, const void *search) +static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, + cmp_func_t cmp, const void *search) { ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); - return eytzinger0_next(idx, size); + + /* + * if eytitzinger0_find_le() returned -1 - no element was <= search - we + * want to return the first element; next/prev identities mean this work + * as expected + * + * similarly if find_le() returns last element, we should return -1; + * identities mean this all works out: + */ + return eytzinger0_next(idx, nr); } #define eytzinger0_find(base, nr, size, _cmp, search) \ diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ab811c0dad26..04a577848b01 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j) track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb)) trace_and_count(c, journal_full, c); + mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin); + swap(watermark, j->watermark); if (watermark > j->watermark) journal_wake(j); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 8c053cb64ca5..b5161b5d76a0 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -134,6 +134,7 @@ enum journal_flags { JOURNAL_STARTED, JOURNAL_MAY_SKIP_FLUSH, JOURNAL_NEED_FLUSH_WRITE, + JOURNAL_SPACE_LOW, }; /* Reasons we may fail to get a journal reservation: */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b76c16152579..0f328aba9760 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -47,20 +47,6 @@ void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) } } -static bool btree_id_is_alloc(enum btree_id id) -{ - switch (id) { - case BTREE_ID_alloc: - case BTREE_ID_backpointers: - case BTREE_ID_need_discard: - case BTREE_ID_freespace: - case BTREE_ID_bucket_gens: - return true; - default: - return false; - } -} - /* for -o reconstruct_alloc: */ static void bch2_reconstruct_alloc(struct bch_fs *c) { diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 0e806f04f3d7..544322d5c251 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -125,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances return s->parent; } +static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) +{ + const struct snapshot_t *s = __snapshot_t(t, id); + if (!s) + return false; + + return test_bit(ancestor - id - 1, s->is_ancestor); +} + bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { bool ret; @@ -140,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) while (id && id < ancestor - IS_ANCESTOR_BITMAP) id = get_ancestor_below(t, id, ancestor); - if (id && id < ancestor) { - ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor); + ret = id && id < ancestor + ? test_ancestor_bitmap(t, id, ancestor) + : id == ancestor; - EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); - } else { - ret = id == ancestor; - } + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); out: rcu_read_unlock(); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e0aa3655b63b..5eee055ee272 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb) { kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->s_bdev_file)) - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); kfree(sb->holder); kfree(sb->sb_name); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ed63018f21be..8daf80a38d60 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -288,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c) if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) && !test_bit(BCH_FS_emergency_ro, &c->flags)) set_bit(BCH_FS_clean_shutdown, &c->flags); + bch2_fs_journal_stop(&c->journal); + bch_info(c, "%sshutdown complete, journal seq %llu", + test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un", + c->journal.seq_ondisk); + /* * After stopping journal: */ diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index c86a93a8d8fc..b18b0cc81b59 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -17,7 +17,6 @@ #include "btree_iter.h" #include "btree_key_cache.h" #include "btree_update.h" -#include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" #include "clock.h" @@ -166,7 +165,6 @@ read_attribute(btree_write_stats); read_attribute(btree_cache_size); read_attribute(compression_stats); read_attribute(journal_debug); -read_attribute(btree_updates); read_attribute(btree_cache); read_attribute(btree_key_cache); read_attribute(stripes_heap); @@ -415,9 +413,6 @@ SHOW(bch2_fs) if (attr == &sysfs_journal_debug) bch2_journal_debug_to_text(out, &c->journal); - if (attr == &sysfs_btree_updates) - bch2_btree_updates_to_text(out, c); - if (attr == &sysfs_btree_cache) bch2_btree_cache_to_text(out, c); @@ -639,7 +634,6 @@ SYSFS_OPS(bch2_fs_internal); struct attribute *bch2_fs_internal_files[] = { &sysfs_flags, &sysfs_journal_debug, - &sysfs_btree_updates, &sysfs_btree_cache, &sysfs_btree_key_cache, &sysfs_new_stripes, diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index b3fe9fc57747..bfec656f94c0 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index b7e7c29278fc..5cf885b09986 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi #endif +static inline void mod_bit(long nr, volatile unsigned long *addr, bool v) +{ + if (v) + set_bit(nr, addr); + else + clear_bit(nr, addr); +} + static inline void __set_bit_le64(size_t bit, __le64 *addr) { addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64)); @@ -795,7 +803,7 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr) static inline void __clear_bit_le64(size_t bit, __le64 *addr) { - addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64)); + addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64)); } static inline bool test_bit_le64(size_t bit, __le64 *addr) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index dd6f566a383f..121ab890bd05 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_record_root_in_trans(trans, node->root); + if (ret) + return ret; ret = btrfs_update_delayed_inode(trans, node->root, path, node); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 37701531eeb1..c65fe5de4022 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, */ if (bits & EXTENT_CLEAR_META_RESV && root != fs_info->tree_root) - btrfs_delalloc_release_metadata(inode, len, false); + btrfs_delalloc_release_metadata(inode, len, true); /* For sanity tests. */ if (btrfs_is_testing(fs_info)) @@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_block_rsv block_rsv; u64 root_flags; + u64 qgroup_reserved = 0; int ret; down_write(&fs_info->subvol_sem); @@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) goto out_undead; + qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_release; } + ret = btrfs_record_root_in_trans(trans, root); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_end_trans; + } + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; @@ -4611,7 +4620,9 @@ out_end_trans: ret = btrfs_end_transaction(trans); inode->i_flags |= S_DEAD; out_release: - btrfs_subvolume_release_metadata(root, &block_rsv); + btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_undead: if (ret) { spin_lock(&dest->root_item_lock); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 294e31edec9d..55f3ba6a831c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap, int ret; dev_t anon_dev; u64 objectid; + u64 qgroup_reserved = 0; root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); if (!root_item) @@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap, trans_num_items, false); if (ret) goto out_new_inode_args; + qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - btrfs_subvolume_release_metadata(root, &block_rsv); - goto out_new_inode_args; + goto out_release_rsv; } + ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); + if (ret) + goto out; + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; /* Tree log can't currently deal with an inode which is a new root. */ @@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap, out: trans->block_rsv = NULL; trans->bytes_reserved = 0; - btrfs_subvolume_release_metadata(root, &block_rsv); - btrfs_end_transaction(trans); +out_release_rsv: + btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_new_inode_args: btrfs_new_inode_args_destroy(&new_inode_args); out_inode: @@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, struct btrfs_pending_snapshot *pending_snapshot; unsigned int trans_num_items; struct btrfs_trans_handle *trans; + struct btrfs_block_rsv *block_rsv; + u64 qgroup_reserved = 0; int ret; /* We do not support snapshotting right now. */ @@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, goto free_pending; } - btrfs_init_block_rsv(&pending_snapshot->block_rsv, - BTRFS_BLOCK_RSV_TEMP); + block_rsv = &pending_snapshot->block_rsv; + btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP); /* * 1 to add dir item * 1 to add dir index * 1 to update parent inode item */ trans_num_items = create_subvol_num_items(inherit) + 3; - ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, + ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv, trans_num_items, false); if (ret) goto free_pending; + qgroup_reserved = block_rsv->qgroup_rsv_reserved; pending_snapshot->dentry = dentry; pending_snapshot->root = root; @@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ret = PTR_ERR(trans); goto fail; } + ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); + if (ret) { + btrfs_end_transaction(trans); + goto fail; + } + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->pending_snapshot = pending_snapshot; @@ -881,7 +898,9 @@ fail: if (ret && pending_snapshot->snap) pending_snapshot->snap->anon_dev = 0; btrfs_put_root(pending_snapshot->snap); - btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); + btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); free_pending: if (pending_snapshot->anon_dev) free_anon_bdev(pending_snapshot->anon_dev); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5f90f0605b12..cf8820ce7aa2 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) BTRFS_QGROUP_RSV_META_PREALLOC); trace_qgroup_meta_convert(root, num_bytes); qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); + if (!sb_rdonly(fs_info->sb)) + add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 4bb538a372ce..7007f9e0c972 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, } return ret; } - -void btrfs_subvolume_release_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - u64 qgroup_to_release; - - btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release); - btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release); -} diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h index 6f929cf3bd49..8f5739e732b9 100644 --- a/fs/btrfs/root-tree.h +++ b/fs/btrfs/root-tree.h @@ -18,8 +18,6 @@ struct btrfs_trans_handle; int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, bool use_global_rsv); -void btrfs_subvolume_release_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv); int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, u64 ref_id, u64 dirid, u64 sequence, const struct fscrypt_str *name); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46e8426adf4f..85f359e0e0a7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -745,14 +745,6 @@ again: h->reloc_reserved = reloc_reserved; } - /* - * Now that we have found a transaction to be a part of, convert the - * qgroup reservation from prealloc to pertrans. A different transaction - * can't race in and free our pertrans out from under us. - */ - if (qgroup_reserved) - btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); - got_it: if (!current->journal_info) current->journal_info = h; @@ -786,8 +778,15 @@ got_it: * not just freed. */ btrfs_end_transaction(h); - return ERR_PTR(ret); + goto reserve_fail; } + /* + * Now that we have found a transaction to be a part of, convert the + * qgroup reservation from prealloc to pertrans. A different transaction + * can't race in and free our pertrans out from under us. + */ + if (qgroup_reserved) + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); return h; @@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + btrfs_qgroup_free_meta_all_pertrans(root); spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); @@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) if (ret2) return ret2; spin_lock(&fs_info->fs_roots_radix_lock); - btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 39e75131fd5a..9901057a15ba 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb) sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } kfree(sbi); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfb8449c731f..044135796f2b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5668,7 +5668,7 @@ failed_mount: brelse(sbi->s_sbh); if (sbi->s_journal_bdev_file) { invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); - fput(sbi->s_journal_bdev_file); + bdev_fput(sbi->s_journal_bdev_file); } out_fail: invalidate_bdev(sb->s_bdev); @@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb, out_bh: brelse(bh); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb) kill_block_super(sb); if (bdev_file) - fput(bdev_file); + bdev_fput(bdev_file); } static struct file_system_type ext4_fs_type = { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6867f26f141..a4bc26dfdb1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { if (i > 0) - fput(FDEV(i).bdev_file); + bdev_fput(FDEV(i).bdev_file); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 73389c68e251..9609349e92e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - fput(bdev_file); + bdev_fput(bdev_file); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev_file = log->bdev_file; rc = lmLogShutdown(log); - fput(bdev_file); + bdev_fput(bdev_file); kfree(log); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2391ab3c3231..84d4093ca713 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3042,12 +3042,9 @@ static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); + drop_client(clp); } static int @@ -6616,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn) list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); + kref_get(&clp->cl_nfsdfs.cl_ref); set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); } diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c index 902b326e1e56..87dcaae32ff8 100644 --- a/fs/proc/bootconfig.c +++ b/fs/proc/bootconfig.c @@ -62,12 +62,12 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) break; dst += ret; } - if (ret >= 0 && boot_command_line[0]) { - ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n", - boot_command_line); - if (ret > 0) - dst += ret; - } + } + if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) { + ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n", + boot_command_line); + if (ret > 0) + dst += ret; } out: kfree(key); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 6474529c4253..e539ccd39e1e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb) static void release_journal_dev(struct reiserfs_journal *journal) { if (journal->j_bdev_file) { - fput(journal->j_bdev_file); + bdev_fput(journal->j_bdev_file); journal->j_bdev_file = NULL; } } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 2be227532f39..2cbb92462074 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb) #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } #endif } diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index a0017724d523..13a9d7acf8f8 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); + int rc; spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { @@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref) cfid->dentry = NULL; if (cfid->is_open) { - SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, + rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc != -EBUSY && rc != -EAGAIN) + atomic_dec(&cfid->tcon->num_remote_opens); } free_cached_dir(cfid); diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 226d4835c92d..c71ae5c04306 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { @@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, } #endif /* CONFIG_CIFS_STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { atomic_set(&tcon->num_smbs_sent, 0); spin_lock(&tcon->stat_lock); @@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) } #endif /* STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { i++; seq_printf(m, "\n%d) %s", i, tcon->tree_name); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index aa6f1ecb7c0e..d41eedbff674 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq; struct workqueue_struct *fileinfo_put_wq; struct workqueue_struct *cifsoplockd_wq; struct workqueue_struct *deferredclose_wq; +struct workqueue_struct *serverclose_wq; __u32 cifs_lock_secret; /* @@ -1888,6 +1889,13 @@ init_cifs(void) goto out_destroy_cifsoplockd_wq; } + serverclose_wq = alloc_workqueue("serverclose", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!serverclose_wq) { + rc = -ENOMEM; + goto out_destroy_serverclose_wq; + } + rc = cifs_init_inodecache(); if (rc) goto out_destroy_deferredclose_wq; @@ -1962,6 +1970,8 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; @@ -1991,6 +2001,7 @@ exit_cifs(void) destroy_workqueue(cifsoplockd_wq); destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); + destroy_workqueue(serverclose_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7ed9d05f6890..f6a302205f89 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -442,10 +442,10 @@ struct smb_version_operations { /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ - void (*close)(const unsigned int, struct cifs_tcon *, + int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); @@ -1281,7 +1281,6 @@ struct cifs_tcon { struct cached_fids *cfids; /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL - struct list_head dfs_ses_list; struct delayed_work dfs_cache_work; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ @@ -1440,6 +1439,7 @@ struct cifsFileInfo { bool swapfile:1; bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ + bool offload:1; /* offload final part of _put to a wq */ unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; @@ -1448,6 +1448,7 @@ struct cifsFileInfo { struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ + struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; @@ -1804,7 +1805,6 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct list_head dfs_ses_list; }; static inline void __free_dfs_info_param(struct dfs_info3_param *param) @@ -2105,6 +2105,7 @@ extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; +extern struct workqueue_struct *serverclose_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; @@ -2324,4 +2325,14 @@ struct smb2_compound_vars { struct kvec ea_iov; }; +static inline bool cifs_ses_exiting(struct cifs_ses *ses) +{ + bool ret; + + spin_lock(&ses->ses_lock); + ret = ses->ses_status == SES_EXITING; + spin_unlock(&ses->ses_lock); + return ret; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 0723e1b57256..8e0a348f1f66 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -725,31 +725,31 @@ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); void cifs_put_tcon_super(struct super_block *sb); int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); -/* Put references of @ses and @ses->dfs_root_ses */ +/* Put references of @ses and its children */ static inline void cifs_put_smb_ses(struct cifs_ses *ses) { - struct cifs_ses *rses = ses->dfs_root_ses; + struct cifs_ses *next; - __cifs_put_smb_ses(ses); - if (rses) - __cifs_put_smb_ses(rses); + do { + next = ses->dfs_root_ses; + __cifs_put_smb_ses(ses); + } while ((ses = next)); } -/* Get an active reference of @ses and @ses->dfs_root_ses. +/* Get an active reference of @ses and its children. * * NOTE: make sure to call this function when incrementing reference count of * @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses) * will also get its reference count incremented. * - * cifs_put_smb_ses() will put both references, so call it when you're done. + * cifs_put_smb_ses() will put all references, so call it when you're done. */ static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses) { lockdep_assert_held(&cifs_tcp_ses_lock); - ses->ses_count++; - if (ses->dfs_root_ses) - ses->dfs_root_ses->ses_count++; + for (; ses; ses = ses->dfs_root_ses) + ses->ses_count++; } static inline bool dfs_src_pathname_equal(const char *s1, const char *s2) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 5aee55551573..23b5709ddc31 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5854,10 +5854,8 @@ SetEARetry: parm_data->list.EA_flags = 0; /* we checked above that name len is less than 255 */ parm_data->list.name_len = (__u8)name_len; - /* EA names are always ASCII */ - if (ea_name) - strncpy(parm_data->list.name, ea_name, name_len); - parm_data->list.name[name_len] = '\0'; + /* EA names are always ASCII and NUL-terminated */ + strscpy(parm_data->list.name, ea_name ?: "", name_len + 1); parm_data->list.value_len = cpu_to_le16(ea_value_len); /* caller ensures that ea_value_len is less than 64K but we need to ensure that it fits within the smb */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 9b85b5341822..85679ae106fd 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { if (!ses->chans[i].server) @@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { - /* check if iface is still active */ + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); + continue; + } + spin_unlock(&ses->ses_lock); + spin_lock(&ses->chan_lock); if (cifs_ses_get_chan_index(ses, server) == CIFS_INVAL_CHAN_INDEX) { @@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) ctx->sectype != ses->sectype) return 0; + if (ctx->dfs_root_ses != ses->dfs_root_ses) + return 0; + /* * If an existing session is limited to less channels than * requested, it should not be reused @@ -1963,31 +1974,6 @@ out: return rc; } -/** - * cifs_free_ipc - helper to release the session IPC tcon - * @ses: smb session to unmount the IPC from - * - * Needs to be called everytime a session is destroyed. - * - * On session close, the IPC is closed and the server must release all tcons of the session. - * No need to send a tree disconnect here. - * - * Besides, it will make the server to not close durable and resilient files on session close, as - * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request. - */ -static int -cifs_free_ipc(struct cifs_ses *ses) -{ - struct cifs_tcon *tcon = ses->tcon_ipc; - - if (tcon == NULL) - return 0; - - tconInfoFree(tcon); - ses->tcon_ipc = NULL; - return 0; -} - static struct cifs_ses * cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { @@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) void __cifs_put_smb_ses(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; + struct cifs_tcon *tcon; unsigned int xid; size_t i; + bool do_logoff; int rc; + spin_lock(&cifs_tcp_ses_lock); spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_EXITING) { + cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n", + __func__, ses->Suid, ses->ses_count, ses->ses_status, + ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none"); + if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) { spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); return; } - spin_unlock(&ses->ses_lock); + /* ses_count can never go negative */ + WARN_ON(ses->ses_count < 0); - cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); - cifs_dbg(FYI, - "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE"); + spin_lock(&ses->chan_lock); + cifs_chan_clear_need_reconnect(ses, server); + spin_unlock(&ses->chan_lock); - spin_lock(&cifs_tcp_ses_lock); - if (--ses->ses_count > 0) { - spin_unlock(&cifs_tcp_ses_lock); - return; - } - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_GOOD) - ses->ses_status = SES_EXITING; + do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff; + ses->ses_status = SES_EXITING; + tcon = ses->tcon_ipc; + ses->tcon_ipc = NULL; spin_unlock(&ses->ses_lock); spin_unlock(&cifs_tcp_ses_lock); - /* ses_count can never go negative */ - WARN_ON(ses->ses_count < 0); - - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_EXITING && server->ops->logoff) { - spin_unlock(&ses->ses_lock); - cifs_free_ipc(ses); + /* + * On session close, the IPC is closed and the server must release all + * tcons of the session. No need to send a tree disconnect here. + * + * Besides, it will make the server to not close durable and resilient + * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an + * SMB2 LOGOFF Request. + */ + tconInfoFree(tcon); + if (do_logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); if (rc) cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n", __func__, rc); _free_xid(xid); - } else { - spin_unlock(&ses->ses_lock); - cifs_free_ipc(ses); } spin_lock(&cifs_tcp_ses_lock); @@ -2373,9 +2363,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + if (ctx->dfs_root_ses) + cifs_smb_ses_inc_refcount(ctx->dfs_root_ses); ses->dfs_root_ses = ctx->dfs_root_ses; - if (ses->dfs_root_ses) - ses->dfs_root_ses->ses_count++; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3326,6 +3316,9 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) cifs_put_tcp_session(mnt_ctx->server, 0); + mnt_ctx->ses = NULL; + mnt_ctx->tcon = NULL; + mnt_ctx->server = NULL; mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; free_xid(mnt_ctx->xid); } @@ -3604,8 +3597,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) bool isdfs; int rc; - INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list); - rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; @@ -3636,7 +3627,6 @@ out: return rc; error: - dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list); cifs_mount_put_conns(&mnt_ctx); return rc; } @@ -3651,6 +3641,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; rc = cifs_mount_get_tcon(&mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx.server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx.ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx.tcon)) + rc = -ENOENT; + } if (rc) goto error; @@ -3988,13 +3990,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses) } static struct cifs_tcon * -cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) +__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) { int rc; struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); struct cifs_ses *ses; struct cifs_tcon *tcon = NULL; struct smb3_fs_context *ctx; + char *origin_fullpath = NULL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx == NULL) @@ -4018,6 +4021,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ctx->sign = master_tcon->ses->sign; ctx->seal = master_tcon->seal; ctx->witness = master_tcon->use_witness; + ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses; rc = cifs_set_vol_auth(ctx, master_tcon->ses); if (rc) { @@ -4037,12 +4041,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + spin_lock(&master_tcon->tc_lock); + if (master_tcon->origin_fullpath) { + spin_unlock(&master_tcon->tc_lock); + origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source); + if (IS_ERR(origin_fullpath)) { + tcon = ERR_CAST(origin_fullpath); + origin_fullpath = NULL; + cifs_put_smb_ses(ses); + goto out; + } + } else { + spin_unlock(&master_tcon->tc_lock); + } +#endif + tcon = cifs_get_tcon(ses, ctx); if (IS_ERR(tcon)) { cifs_put_smb_ses(ses); goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + if (origin_fullpath) { + spin_lock(&tcon->tc_lock); + tcon->origin_fullpath = origin_fullpath; + spin_unlock(&tcon->tc_lock); + origin_fullpath = NULL; + queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, + dfs_cache_get_ttl() * HZ); + } +#endif + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, ctx); @@ -4051,11 +4082,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) out: kfree(ctx->username); kfree_sensitive(ctx->password); + kfree(origin_fullpath); kfree(ctx); return tcon; } +static struct cifs_tcon * +cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) +{ + struct cifs_tcon *ret; + + cifs_mount_lock(); + ret = __cifs_construct_tcon(cifs_sb, fsuid); + cifs_mount_unlock(); + return ret; +} + struct cifs_tcon * cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) { diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 449c59830039..3ec965547e3d 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) } /* - * Track individual DFS referral servers used by new DFS mount. - * - * On success, their lifetime will be shared by final tcon (dfs_ses_list). - * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount(). + * Get an active reference of @ses so that next call to cifs_put_tcon() won't + * release it as any new DFS referrals must go through its IPC tcon. */ -static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) +static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct dfs_root_ses *root_ses; struct cifs_ses *ses = mnt_ctx->ses; if (ses) { - root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL); - if (!root_ses) - return -ENOMEM; - - INIT_LIST_HEAD(&root_ses->list); - spin_lock(&cifs_tcp_ses_lock); cifs_smb_ses_inc_refcount(ses); spin_unlock(&cifs_tcp_ses_lock); - root_ses->ses = ses; - list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } - /* Select new DFS referral server so that new referrals go through it */ ctx->dfs_root_ses = ses; - return 0; } static inline int parse_dfs_target(struct smb3_fs_context *ctx, @@ -185,11 +172,8 @@ again: continue; } - if (is_refsrv) { - rc = add_root_smb_session(mnt_ctx); - if (rc) - goto out; - } + if (is_refsrv) + add_root_smb_session(mnt_ctx); rc = ref_walk_advance(rw); if (!rc) { @@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_tcon *tcon; char *origin_fullpath; + bool new_tcon = true; int rc; origin_fullpath = dfs_get_path(cifs_sb, ctx->source); @@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) return PTR_ERR(origin_fullpath); rc = dfs_referral_walk(mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx->server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx->ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx->tcon)) + rc = -ENOENT; + } if (rc) goto out; @@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) if (!tcon->origin_fullpath) { tcon->origin_fullpath = origin_fullpath; origin_fullpath = NULL; + } else { + new_tcon = false; } spin_unlock(&tcon->tc_lock); - if (list_empty(&tcon->dfs_ses_list)) { - list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list); + if (new_tcon) { queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, dfs_cache_get_ttl() * HZ); - } else { - dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list); } out: @@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) if (rc) return rc; - ctx->dfs_root_ses = mnt_ctx->ses; /* * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally * try to get an DFS referral (even cached) to determine whether it is an DFS mount. @@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) *isdfs = true; add_root_smb_session(mnt_ctx); - return __dfs_mount_share(mnt_ctx); + rc = __dfs_mount_share(mnt_ctx); + dfs_put_root_smb_sessions(mnt_ctx); + return rc; } /* Update dfs referral path of superblock */ diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h index 875ab7ae57fc..e5c4dcf83750 100644 --- a/fs/smb/client/dfs.h +++ b/fs/smb/client/dfs.h @@ -7,7 +7,9 @@ #define _CIFS_DFS_H #include "cifsglob.h" +#include "cifsproto.h" #include "fs_context.h" +#include "dfs_cache.h" #include "cifs_unicode.h" #include <linux/namei.h> @@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw) ref_walk_tit(rw)); } -struct dfs_root_ses { - struct list_head list; - struct cifs_ses *ses; -}; - int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, struct smb3_fs_context *ctx); int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); @@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses; - return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls, + return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls, cifs_remap(cifs_sb), path, ref, tl); } -static inline void dfs_put_root_smb_sessions(struct list_head *head) +/* + * cifs_get_smb_ses() already guarantees an active reference of + * @ses->dfs_root_ses when a new session is created, so we need to put extra + * references of all DFS root sessions that were used across the mount process + * in dfs_mount_share(). + */ +static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx) { - struct dfs_root_ses *root, *tmp; + const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct cifs_ses *ses = ctx->dfs_root_ses; + struct cifs_ses *cur; + + if (!ses) + return; - list_for_each_entry_safe(root, tmp, head, list) { - list_del_init(&root->list); - cifs_put_smb_ses(root->ses); - kfree(root); + for (cur = ses; cur; cur = cur->dfs_root_ses) { + if (cur->dfs_root_ses) + cifs_put_smb_ses(cur->dfs_root_ses); } + cifs_put_smb_ses(ses); } #endif /* _CIFS_DFS_H */ diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 508d831fabe3..11c8efecf7aa 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses) return ret; } -/* Refresh dfs referral of tcon and mark it for reconnect if needed */ -static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh) +/* Refresh dfs referral of @ses and mark it for reconnect if needed */ +static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh) { struct TCP_Server_Info *server = ses->server; DFS_CACHE_TGT_LIST(old_tl); @@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref bool needs_refresh = false; struct cache_entry *ce; unsigned int xid; + char *path = NULL; int rc = 0; xid = get_xid(); + mutex_lock(&server->refpath_lock); + if (server->leaf_fullpath) { + path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC); + if (!path) + rc = -ENOMEM; + } + mutex_unlock(&server->refpath_lock); + if (!path) + goto out; + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); @@ -1218,19 +1229,17 @@ out: free_xid(xid); dfs_cache_free_tgts(&old_tl); dfs_cache_free_tgts(&new_tl); - return rc; + kfree(path); } -static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh) +static inline void refresh_ses_referral(struct cifs_ses *ses) { - struct TCP_Server_Info *server = tcon->ses->server; - struct cifs_ses *ses = tcon->ses; + __refresh_ses_referral(ses, false); +} - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh); - mutex_unlock(&server->refpath_lock); - return 0; +static inline void force_refresh_ses_referral(struct cifs_ses *ses) +{ + __refresh_ses_referral(ses, true); } /** @@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) */ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; - return refresh_tcon(tcon, true); + force_refresh_ses_referral(tcon->ses); + return 0; } /* Refresh all DFS referrals related to DFS tcon */ void dfs_cache_refresh(struct work_struct *work) { - struct TCP_Server_Info *server; - struct dfs_root_ses *rses; struct cifs_tcon *tcon; struct cifs_ses *ses; tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work); - ses = tcon->ses; - server = ses->server; - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, false); - mutex_unlock(&server->refpath_lock); - - list_for_each_entry(rses, &tcon->dfs_ses_list, list) { - ses = rses->ses; - server = ses->server; - mutex_lock(&server->refpath_lock); - if (server->leaf_fullpath) - __refresh_tcon(server->leaf_fullpath + 1, ses, false); - mutex_unlock(&server->refpath_lock); - } + for (ses = tcon->ses; ses; ses = ses->dfs_root_ses) + refresh_ses_referral(ses); queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, atomic_read(&dfs_cache_ttl) * HZ); diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index d11dc3aa458b..864b194dbaa0 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; *oplock = 0; if (tcon->ses->server->oplocks) @@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned return PTR_ERR(full_path); } + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & @@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) desired_access |= GENERIC_WRITE; + if (rdwr_for_fscache == 1) + desired_access |= GENERIC_READ; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) { cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access &= ~GENERIC_READ; + rdwr_for_fscache = 2; + goto retry_open; + } goto out; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 16aadce492b2..9be37d0fe724 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } -static inline int cifs_convert_flags(unsigned int flags) +static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) { if ((flags & O_ACCMODE) == O_RDONLY) return GENERIC_READ; else if ((flags & O_ACCMODE) == O_WRONLY) - return GENERIC_WRITE; + return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; else if ((flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request can cause unnecessary access denied on create */ @@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ int create_options = CREATE_NOT_DIR; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; if (!server->ops->open) return -ENOSYS; - desired_access = cifs_convert_flags(f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); /********************************************************************* * open flag mapping table: @@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ }; rc = server->ops->open(xid, &oparms, oplock, buf); - if (rc) + if (rc) { + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } return rc; + } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) @@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem) } static void cifsFileInfo_put_work(struct work_struct *work); +void serverclose_work(struct work_struct *work); struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock, @@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_WORK(&cfile->serverclose, serverclose_work); INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) cifsFileInfo_put_final(cifs_file); } +void serverclose_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, serverclose); + + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + int retries = 0; + int MAX_RETRIES = 4; + + do { + if (server->ops->close_getattr) + rc = server->ops->close_getattr(0, tcon, cifs_file); + else if (server->ops->close) + rc = server->ops->close(0, tcon, &cifs_file->fid); + + if (rc == -EBUSY || rc == -EAGAIN) { + retries++; + msleep(250); + } + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) + ); + + if (retries == MAX_RETRIES) + pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); + + if (cifs_file->offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, struct cifs_fid fid = {}; struct cifs_pending_open open; bool oplock_break_cancelled; + bool serverclose_offloaded = false; spin_lock(&tcon->open_file_lock); spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); + + cifs_file->offload = offload; if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); spin_unlock(&cifsi->open_file_lock); @@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); if (server->ops->close_getattr) - server->ops->close_getattr(xid, tcon, cifs_file); + rc = server->ops->close_getattr(xid, tcon, cifs_file); else if (server->ops->close) - server->ops->close(xid, tcon, &cifs_file->fid); + rc = server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); + + if (rc == -EBUSY || rc == -EAGAIN) { + // Server close failed, hence offloading it as an async op + queue_work(serverclose_wq, &cifs_file->serverclose); + serverclose_offloaded = true; + } } if (oplock_break_cancelled) @@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, cifs_del_pending_open(&open); - if (offload) - queue_work(fileinfo_put_wq, &cifs_file->put); - else - cifsFileInfo_put_final(cifs_file); + // if serverclose has been offloaded to wq (on failure), it will + // handle offloading put as well. If serverclose not offloaded, + // we need to handle offloading put here. + if (!serverclose_offloaded) { + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); + } } int cifs_open(struct inode *inode, struct file *file) @@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file) use_cache: fscache_use_cookie(cifs_inode_cookie(file_inode(file)), file->f_mode & FMODE_WRITE); - if (file->f_flags & O_DIRECT && - (!((file->f_flags & O_ACCMODE) != O_RDONLY) || - file->f_flags & O_APPEND)) - cifs_invalidate_cache(file_inode(file), - FSCACHE_INVAL_DIO_WRITE); + if (!(file->f_flags & O_DIRECT)) + goto out; + if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) + goto out; + cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); out: free_dentry_path(page); @@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - desired_access = cifs_convert_flags(cfile->f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) @@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) /* indicate that we need to relock the file */ oparms.reconnect = true; } + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(cfile->f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } if (rc) { mutex_unlock(&cfile->fh_mutex); @@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) goto reopen_error_exit; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY reopen_success: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index bdcbe6ff2739..b7bfe705b2c4 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -37,7 +37,7 @@ #include "rfc1002pdu.h" #include "fs_context.h" -static DEFINE_MUTEX(cifs_mount_mutex); +DEFINE_MUTEX(cifs_mount_mutex); static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, @@ -783,9 +783,9 @@ static int smb3_get_tree(struct fs_context *fc) if (err) return err; - mutex_lock(&cifs_mount_mutex); + cifs_mount_lock(); ret = smb3_get_tree_common(fc); - mutex_unlock(&cifs_mount_mutex); + cifs_mount_unlock(); return ret; } diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 7863f2248c4d..8a35645e0b65 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -304,4 +304,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); #define MAX_CACHED_FIDS 16 extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp); +extern struct mutex cifs_mount_mutex; + +static inline void cifs_mount_lock(void) +{ + mutex_lock(&cifs_mount_mutex); +} + +static inline void cifs_mount_unlock(void) +{ + mutex_unlock(&cifs_mount_mutex); +} + #endif diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index a3d73720914f..1f2ea9f5cc9a 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode, __cifs_readahead_to_fscache(inode, pos, len); } +static inline bool cifs_fscache_enabled(struct inode *inode) +{ + return fscache_cookie_enabled(cifs_inode_cookie(inode)); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline void cifs_fscache_fill_coherency(struct inode *inode, @@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} +static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } static inline int cifs_fscache_query_occupancy(struct inode *inode, pgoff_t first, unsigned int nr_pages, diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index c012dfdba80d..855ac5a62edf 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) { - if (ses_it->Suid == out.session_id) { + spin_lock(&ses_it->ses_lock); + if (ses_it->ses_status != SES_EXITING && + ses_it->Suid == out.session_id) { ses = ses_it; /* * since we are using the session outside the crit @@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug * so increment its refcount */ cifs_smb_ses_inc_refcount(ses); + spin_unlock(&ses_it->ses_lock); found = true; goto search_end; } + spin_unlock(&ses_it->ses_lock); } } search_end: diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index c3771fc81328..33ac4f8f5050 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -138,9 +138,6 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_LIST_HEAD(&ret_buf->dfs_ses_list); -#endif return ret_buf; } @@ -156,9 +153,6 @@ tconInfoFree(struct cifs_tcon *tcon) atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); kfree_sensitive(tcon->password); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_put_root_smb_sessions(&tcon->dfs_ses_list); -#endif kfree(tcon->origin_fullpath); kfree(tcon); } @@ -487,6 +481,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index a9eaba8083b0..212ec6f66ec6 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } -static void +static int cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - CIFSSMBClose(xid, tcon, fid->netfid); + return CIFSSMBClose(xid, tcon, fid->netfid); } static int diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 82b84a4941dd..cc72be5a93a9 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); cifs_stats_inc( @@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 2ed456948f34..b156eefa75d7 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) memcpy(cfile->fid.create_guid, fid->create_guid, 16); } -static void +static int smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } -static void +static int smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile) { @@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, &file_inf); if (rc) - return; + return rc; inode = d_inode(cfile->dentry); @@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); + return rc; } static int @@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); @@ -3913,7 +3916,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, strcat(message, "W"); } if (!new_oplock) - strncpy(message, "None", sizeof(message)); + strscpy(message, "None"); cinode->oplock = new_oplock; cifs_dbg(FYI, "%s Lease granted on inode %p\n", message, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 3ea688558e6c..c0c4933af5fc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3628,9 +3628,9 @@ replay_again: memcpy(&pbuf->network_open_info, &rsp->network_open_info, sizeof(pbuf->network_open_info)); + atomic_dec(&tcon->num_remote_opens); } - atomic_dec(&tcon->num_remote_opens); close_exit: SMB2_close_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 5a3ca62d2f07..1d6e54f7879e 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -659,7 +659,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) } spin_unlock(&server->srv_lock); if (!is_binding && !server->session_estab) { - strncpy(shdr->Signature, "BSRSPYL", 8); + strscpy(shdr->Signature, "BSRSPYL"); return 0; } diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 8ca8a45c4c62..686b321c5a8b 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -167,7 +167,8 @@ struct ksmbd_share_config_response { __u16 force_uid; __u16 force_gid; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; - __u32 reserved[112]; /* Reserved room */ + __u32 reserved[111]; /* Reserved room */ + __u32 payload_sz; __u32 veto_list_sz; __s8 ____payload[]; }; diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 328a412259dc..a2f0a2edceb8 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->name = kstrdup(name, GFP_KERNEL); if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { - share->path = kstrdup(ksmbd_share_config_path(resp), + int path_len = PATH_MAX; + + if (resp->payload_sz) + path_len = resp->payload_sz - resp->veto_list_sz; + + share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); if (share->path) share->path_sz = strlen(share->path); diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index a45f7dca482e..606aa3c5189a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; } @@ -278,11 +283,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_DIRECTORY_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || - (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d478fa0c57ab..5723bbf372d7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5857,8 +5857,9 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; - smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); + if (!rc) + smb_break_all_levII_oplock(work, fp, 0); out: kfree(new_name); return rc; diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index f29bb03f0dc4..8752ac82c557 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -65,6 +65,7 @@ struct ipc_msg_table_entry { struct hlist_node ipc_table_hlist; void *response; + unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; @@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz) } memcpy(entry->response, payload, sz); + entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; @@ -453,6 +455,34 @@ out: return ret; } +static int ipc_validate_msg(struct ipc_msg_table_entry *entry) +{ + unsigned int msg_sz = entry->msg_sz; + + if (entry->type == KSMBD_EVENT_RPC_REQUEST) { + struct ksmbd_rpc_command *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; + } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { + struct ksmbd_spnego_authen_response *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_spnego_authen_response) + + resp->session_key_len + resp->spnego_blob_len; + } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { + struct ksmbd_share_config_response *resp = entry->response; + + if (resp->payload_sz) { + if (resp->payload_sz < resp->veto_list_sz) + return -EINVAL; + + msg_sz = sizeof(struct ksmbd_share_config_response) + + resp->payload_sz; + } + } + + return entry->msg_sz != msg_sz ? -EINVAL : 0; +} + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; @@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); + if (entry.response) { + ret = ipc_validate_msg(&entry); + if (ret) { + kvfree(entry.response); + entry.response = NULL; + } + } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); diff --git a/fs/super.c b/fs/super.c index 71d9779c42b1..69ce6c600968 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev) return error; } -static void fs_bdev_super_get(void *data) -{ - struct super_block *sb = data; - - spin_lock(&sb_lock); - sb->s_count++; - spin_unlock(&sb_lock); -} - -static void fs_bdev_super_put(void *data) -{ - struct super_block *sb = data; - - put_super(sb); -} - const struct blk_holder_ops fs_holder_ops = { .mark_dead = fs_bdev_mark_dead, .sync = fs_bdev_sync, .freeze = fs_bdev_freeze, .thaw = fs_bdev_thaw, - .get_holder = fs_bdev_super_get, - .put_holder = fs_bdev_super_put, }; EXPORT_SYMBOL_GPL(fs_holder_ops); @@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, * writable from userspace even for a read-only block device. */ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - fput(bdev_file); + bdev_fput(bdev_file); return -EACCES; } @@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (fc) warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - fput(bdev_file); + bdev_fput(bdev_file); return -EBUSY; } spin_lock(&sb_lock); @@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb) generic_shutdown_super(sb); if (bdev) { sync_blockdev(bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1a18c381127e..f0fa02264eda 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2030,7 +2030,7 @@ xfs_free_buftarg( fs_put_dax(btp->bt_daxdev, btp->bt_mount); /* the main block device is closed by kill_block_super */ if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) - fput(btp->bt_bdev_file); + bdev_fput(btp->bt_bdev_file); kfree(btp); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ea48774f6b76..d55b42b2480d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1301,8 +1301,19 @@ xfs_link( */ if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) && tdp->i_projid != sip->i_projid)) { - error = -EXDEV; - goto error_return; + /* + * Project quota setup skips special files which can + * leave inodes in a PROJINHERIT directory without a + * project ID set. We need to allow links to be made + * to these "project-less" inodes because userspace + * expects them to succeed after project ID setup, + * but everything else should be rejected. + */ + if (!special_file(VFS_I(sip)->i_mode) || + sip->i_projid != 0) { + error = -EXDEV; + goto error_return; + } } if (!resblks) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c21f10ab0f5d..bce020374c5e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -485,7 +485,7 @@ xfs_open_devices( mp->m_logdev_targp = mp->m_ddev_targp; /* Handle won't be used, drop it */ if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); } return 0; @@ -497,10 +497,10 @@ xfs_open_devices( xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: if (rtdev_file) - fput(rtdev_file); + bdev_fput(rtdev_file); out_close_logdev: if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); return error; } |