diff options
Diffstat (limited to 'fs')
49 files changed, 503 insertions, 166 deletions
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ed1644e7683f..d642d06a453b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -424,7 +424,7 @@ error_kill_call: if (call->async) { if (cancel_work_sync(&call->async_work)) afs_put_call(call); - afs_put_call(call); + afs_set_call_complete(call, ret, 0); } ac->error = ret; diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 47e7770d0583..79495cd7a794 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -9,6 +9,7 @@ #include "debug.h" #include "errcode.h" #include "error.h" +#include "journal.h" #include "trace.h" #include <linux/prefetch.h> @@ -424,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); - if (btree_node_dirty(b)) - bch2_btree_complete_write(c, b, btree_current_write(b)); - clear_btree_node_dirty_acct(c, b); - btree_node_data_free(c, b); } - BUG_ON(atomic_read(&c->btree_cache.dirty)); + BUG_ON(!bch2_journal_error(&c->journal) && + atomic_read(&c->btree_cache.dirty)); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 57c20390e10e..5a720f0cd5a6 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1704,8 +1704,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); } -void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, - struct btree_write *w) +static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, + struct btree_write *w) { unsigned long old, new, v = READ_ONCE(b->will_make_reachable); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 7e03dd76fb38..e0d7fa5b1dfb 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -134,9 +134,6 @@ void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); -void bch2_btree_complete_write(struct bch_fs *, struct btree *, - struct btree_write *); - bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); enum btree_write_flags { diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 37fbf22de8fc..1b7a5668df7c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -992,8 +992,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) list_for_each_entry_safe(ck, n, &items, list) { cond_resched(); - bch2_journal_pin_drop(&c->journal, &ck->journal); - list_del(&ck->list); kfree(ck->k); six_lock_exit(&ck->c.lock); diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 324767c0ddcc..25fdca00bf7b 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -554,6 +554,19 @@ int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, BTREE_UPDATE_PREJOURNAL); } +static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, + enum btree_id btree, + struct bkey_i *k) +{ + struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + bkey_copy(n, k); + return bch2_btree_insert_trans(trans, btree, n, 0); +} + int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) @@ -564,6 +577,9 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans, EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); + if (unlikely(trans->journal_replay_not_finished)) + return bch2_btree_insert_clone_trans(trans, btree, k); + trans_for_each_wb_update(trans, i) { if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { bkey_copy(&i->k, k); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6697417273aa..26be38ab6ecb 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1056,6 +1056,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; + if (!(flags & BTREE_INSERT_JOURNAL_RECLAIM) && + watermark < c->journal.watermark) { + struct journal_res res = { 0 }; + + ret = drop_locks_do(trans, + bch2_journal_res_get(&c->journal, &res, 1, + watermark|JOURNAL_RES_GET_CHECK)); + if (ret) + return ERR_PTR(ret); + } + while (1) { nr_nodes[!!update_level] += 1 + split; update_level++; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 71aa5e59787b..2418c528c533 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -471,7 +471,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, * we aren't using the extent overwrite path to delete, we're * just using the normal key deletion path: */ - if (bkey_deleted(&n->k)) + if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_IS_EXTENTS)) n->k.size = 0; return bch2_trans_relock(trans) ?: @@ -591,7 +591,7 @@ int bch2_data_update_init(struct btree_trans *trans, m->data_opts.rewrite_ptrs = 0; /* if iter == NULL, it's just a promote */ if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts); + ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); goto done; } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 1a0f2d571569..2bfff0da7000 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -485,20 +485,15 @@ retry: return ret; } -int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) +int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot) { struct btree_iter iter; struct bkey_s_c k; - u32 snapshot; int ret; - ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); - if (ret) - return ret; - for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, - SPOS(dir.inum, 0, snapshot), - POS(dir.inum, U64_MAX), 0, k, ret) + SPOS(dir, 0, snapshot), + POS(dir, U64_MAX), 0, k, ret) if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; @@ -508,6 +503,14 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) return ret; } +int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) +{ + u32 snapshot; + + return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?: + bch2_empty_dir_snapshot(trans, dir.inum, snapshot); +} + int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) { struct btree_trans *trans = bch2_trans_get(c); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index cd262bf4d9c5..1e3431990abd 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -64,6 +64,7 @@ u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum, const struct bch_hash_info *, const struct qstr *, subvol_inum *); +int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32); int bch2_empty_dir_trans(struct btree_trans *, subvol_inum); int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index f6c92df55270..9d8afcb5979a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1294,7 +1294,8 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, unsigned i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { rewrite_ptrs = 0; goto incompressible; } diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 5a39bcb597a3..a70b7a03057d 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -413,7 +413,7 @@ retry: if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && !arg.src_ptr) - snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol; + snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), dst_dentry, arg.mode|S_IFDIR, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4d51be813509..371565e02ff2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1733,6 +1733,9 @@ static int bch2_unfreeze(struct super_block *sb) struct bch_fs *c = sb->s_fs_info; int ret; + if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) + return 0; + down_write(&c->state_lock); ret = bch2_fs_read_write(c); up_write(&c->state_lock); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index c7849b0753e7..9309cfeecd8d 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -7,6 +7,7 @@ #include "btree_update.h" #include "buckets.h" #include "compress.h" +#include "dirent.h" #include "error.h" #include "extents.h" #include "extent_update.h" @@ -1093,11 +1094,15 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) goto out; - if (fsck_err_on(S_ISDIR(inode.bi_mode), c, - deleted_inode_is_dir, - "directory %llu:%u in deleted_inodes btree", - pos.offset, pos.snapshot)) - goto delete; + if (S_ISDIR(inode.bi_mode)) { + ret = bch2_empty_dir_snapshot(trans, pos.offset, pos.snapshot); + if (fsck_err_on(ret == -ENOTEMPTY, c, deleted_inode_is_dir, + "non empty directory %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + if (ret) + goto out; + } if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, deleted_inode_not_unlinked, diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 489b34046e78..8cf238be6213 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -249,7 +249,7 @@ static bool journal_entry_want_write(struct journal *j) return ret; } -static bool journal_entry_close(struct journal *j) +bool bch2_journal_entry_close(struct journal *j) { bool ret; @@ -383,7 +383,7 @@ static bool journal_quiesced(struct journal *j) bool ret = atomic64_read(&j->seq) == j->seq_ondisk; if (!ret) - journal_entry_close(j); + bch2_journal_entry_close(j); return ret; } @@ -436,7 +436,7 @@ retry: /* * Recheck after taking the lock, so we don't race with another thread - * that just did journal_entry_open() and call journal_entry_close() + * that just did journal_entry_open() and call bch2_journal_entry_close() * unnecessarily */ if (journal_res_get_fast(j, res, flags)) { @@ -1041,7 +1041,7 @@ void bch2_fs_journal_stop(struct journal *j) bch2_journal_reclaim_stop(j); bch2_journal_flush_all_pins(j); - wait_event(j->wait, journal_entry_close(j)); + wait_event(j->wait, bch2_journal_entry_close(j)); /* * Always write a new journal entry, to make sure the clock hands are up diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 4c513fca5ef2..2f768e11aec9 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -266,6 +266,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u return s; } +bool bch2_journal_entry_close(struct journal *); void bch2_journal_buf_put_final(struct journal *, u64, bool); static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 0f17fc5f8d68..5de1b68fb8af 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1599,6 +1599,7 @@ static CLOSURE_CALLBACK(journal_write_done) } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); + bch2_journal_reclaim_fast(j); bch2_journal_space_available(j); closure_wake_up(&w->wait); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index e63c6eda86af..ec712104addb 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -776,6 +776,9 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, (1U << JOURNAL_PIN_btree), 0, 0, 0)) *did_work = true; + if (seq_to_flush > journal_cur_seq(j)) + bch2_journal_entry_close(j); + spin_lock(&j->lock); /* * If journal replay hasn't completed, the unreplayed journal entries diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 770ced1c6285..c7d9074c82d9 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -144,7 +144,7 @@ static int bch2_journal_replay(struct bch_fs *c) u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; size_t i; - int ret; + int ret = 0; move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); keys->gap = keys->nr; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 6e1bfe9feb59..37d16e04e671 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -121,6 +121,14 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, { check_indirect_extent_deleting(new, &flags); + if (old.k->type == KEY_TYPE_reflink_v && + new->k.type == KEY_TYPE_reflink_v && + old.k->u64s == new->k.u64s && + !memcmp(bkey_s_c_to_reflink_v(old).v->start, + bkey_i_to_reflink_v(new)->v.start, + bkey_val_bytes(&new->k) - 8)) + return 0; + return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index ab743115f169..f3cb7115b530 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -276,8 +276,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c if (!btree_type_has_ptrs(id)) continue; - for_each_btree_key(trans, iter, id, POS_MIN, - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = for_each_btree_key2(trans, iter, id, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -309,8 +309,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c nr_uncompressed_extents++; else if (compressed) nr_compressed_extents++; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } bch2_trans_put(trans); diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 51453d4928fa..2833e8ef4c09 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -199,7 +199,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode, start = round_down(start, fs_info->sectorsize); btrfs_free_reserved_data_space_noquota(fs_info, len); - btrfs_qgroup_free_data(inode, reserved, start, len); + btrfs_qgroup_free_data(inode, reserved, start, len, NULL); } /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bbcc3df77646..62cb97f7c94f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4799,6 +4799,32 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, } } +static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *gang[8]; + int i; + int ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + while (1) { + ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + BTRFS_ROOT_TRANS_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + struct btrfs_root *root = gang[i]; + + btrfs_qgroup_free_meta_all_pertrans(root); + radix_tree_tag_clear(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + } + } + spin_unlock(&fs_info->fs_roots_radix_lock); +} + void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { @@ -4827,6 +4853,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, EXTENT_DIRTY); btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); + btrfs_free_all_qgroup_pertrans(fs_info); + cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0455935ff558..01423670bc8a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1547,6 +1547,23 @@ out: return ret; } +static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_head *href) +{ + u64 root = href->owning_root; + + /* + * Don't check must_insert_reserved, as this is called from contexts + * where it has already been unset. + */ + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE || + !href->is_data || !is_fstree(root)) + return; + + btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes, + BTRFS_QGROUP_RSV_DATA); +} + static int run_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, struct btrfs_delayed_ref_node *node, @@ -1569,7 +1586,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_squota_delta delta = { .root = href->owning_root, .num_bytes = node->num_bytes, - .rsv_bytes = href->reserved_bytes, .is_data = true, .is_inc = true, .generation = trans->transid, @@ -1586,11 +1602,9 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, flags, ref->objectid, ref->offset, &key, node->ref_mod, href->owning_root); + free_head_ref_squota_rsv(trans->fs_info, href); if (!ret) ret = btrfs_record_squota_delta(trans->fs_info, &delta); - else - btrfs_qgroup_free_refroot(trans->fs_info, delta.root, - delta.rsv_bytes, BTRFS_QGROUP_RSV_DATA); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root, ref->objectid, ref->offset, @@ -1742,7 +1756,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_squota_delta delta = { .root = href->owning_root, .num_bytes = fs_info->nodesize, - .rsv_bytes = 0, .is_data = false, .is_inc = true, .generation = trans->transid, @@ -1774,8 +1787,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, int ret = 0; if (TRANS_ABORTED(trans)) { - if (insert_reserved) + if (insert_reserved) { btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); + free_head_ref_squota_rsv(trans->fs_info, href); + } return 0; } @@ -1871,6 +1886,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head) { + u64 ret = 0; + /* * We had csum deletions accounted for in our delayed refs rsv, we need * to drop the csum leaves for this update from our delayed_refs_rsv. @@ -1885,14 +1902,13 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums); - return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); + ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); } - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && - head->must_insert_reserved && head->is_data) - btrfs_qgroup_free_refroot(fs_info, head->owning_root, - head->reserved_bytes, BTRFS_QGROUP_RSV_DATA); + /* must_insert_reserved can be set only if we didn't run the head ref. */ + if (head->must_insert_reserved) + free_head_ref_squota_rsv(fs_info, head); - return 0; + return ret; } static int cleanup_ref_head(struct btrfs_trans_handle *trans, @@ -2033,6 +2049,12 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, * spin lock. */ must_insert_reserved = locked_ref->must_insert_reserved; + /* + * Unsetting this on the head ref relinquishes ownership of + * the rsv_bytes, so it is critical that every possible code + * path from here forward frees all reserves including qgroup + * reserve. + */ locked_ref->must_insert_reserved = false; extent_op = locked_ref->extent_op; @@ -3292,7 +3314,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_squota_delta delta = { .root = delayed_ref_root, .num_bytes = num_bytes, - .rsv_bytes = 0, .is_data = is_data, .is_inc = false, .generation = btrfs_extent_generation(leaf, ei), @@ -4937,7 +4958,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, .root = root_objectid, .num_bytes = ins->offset, .generation = trans->transid, - .rsv_bytes = 0, .is_data = true, .is_inc = true, }; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e6230a6ffa98..8f724c54fc8e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2302,7 +2302,8 @@ static int try_release_extent_state(struct extent_io_tree *tree, ret = 0; } else { u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM | - EXTENT_DELALLOC_NEW | EXTENT_CTLBITS); + EXTENT_DELALLOC_NEW | EXTENT_CTLBITS | + EXTENT_QGROUP_RESERVED); /* * At this point we can safely clear everything except the diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f47731c45bb5..32611a4edd6b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3192,7 +3192,7 @@ static long btrfs_fallocate(struct file *file, int mode, qgroup_reserved -= range->len; } else if (qgroup_reserved > 0) { btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved, - range->start, range->len); + range->start, range->len, NULL); qgroup_reserved -= range->len; } list_del(&range->list); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9f5a9894f88f..fb3c3f43c3fa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -688,7 +688,7 @@ out: * And at reserve time, it's always aligned to page size, so * just free one page here. */ - btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE); + btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL); btrfs_free_path(path); btrfs_end_transaction(trans); return ret; @@ -5132,7 +5132,7 @@ static void evict_inode_truncate_pages(struct inode *inode) */ if (state_flags & EXTENT_DELALLOC) btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, - end - start + 1); + end - start + 1, NULL); clear_extent_bit(io_tree, start, end, EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING, @@ -8059,7 +8059,7 @@ next: * reserved data space. * Since the IO will never happen for this page. */ - btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur); + btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL); if (!inode_evicting) { clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_UPTODATE | @@ -9491,7 +9491,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( struct btrfs_path *path; u64 start = ins->objectid; u64 len = ins->offset; - int qgroup_released; + u64 qgroup_released = 0; int ret; memset(&stack_fi, 0, sizeof(stack_fi)); @@ -9504,9 +9504,9 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); /* Encryption and other encoding is reserved and all 0 */ - qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); - if (qgroup_released < 0) - return ERR_PTR(qgroup_released); + ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released); + if (ret < 0) + return ERR_PTR(ret); if (trans) { ret = insert_reserved_file_extent(trans, inode, @@ -10401,7 +10401,7 @@ out_delalloc_release: btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0); out_qgroup_free_data: if (ret < 0) - btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes); + btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL); out_free_data_space: /* * If btrfs_reserve_extent() succeeded, then we already decremented diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 574e8a55e24a..a82e1417c4d2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -152,11 +152,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( { struct btrfs_ordered_extent *entry; int ret; + u64 qgroup_rsv = 0; if (flags & ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) { /* For nocow write, we can release the qgroup rsv right now */ - ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); + ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv); if (ret < 0) return ERR_PTR(ret); } else { @@ -164,7 +165,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( * The ordered extent has reserved qgroup space, release now * and pass the reserved number for qgroup_record to free. */ - ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes); + ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv); if (ret < 0) return ERR_PTR(ret); } @@ -182,7 +183,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( entry->inode = igrab(&inode->vfs_inode); entry->compress_type = compress_type; entry->truncated_len = (u64)-1; - entry->qgroup_rsv = ret; + entry->qgroup_rsv = qgroup_rsv; entry->flags = flags; refcount_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); @@ -599,7 +600,9 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, release = entry->disk_num_bytes; else release = entry->num_bytes; - btrfs_delalloc_release_metadata(btrfs_inode, release, false); + btrfs_delalloc_release_metadata(btrfs_inode, release, + test_bit(BTRFS_ORDERED_IOERR, + &entry->flags)); } percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ce446d9d7f23..e46774e8f49f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4057,13 +4057,14 @@ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, /* Free ranges specified by @reserved, normally in error path */ static int qgroup_free_reserved_data(struct btrfs_inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len, u64 *freed_ret) { struct btrfs_root *root = inode->root; struct ulist_node *unode; struct ulist_iterator uiter; struct extent_changeset changeset; - int freed = 0; + u64 freed = 0; int ret; extent_changeset_init(&changeset); @@ -4104,7 +4105,9 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode, } btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, BTRFS_QGROUP_RSV_DATA); - ret = freed; + if (freed_ret) + *freed_ret = freed; + ret = 0; out: extent_changeset_release(&changeset); return ret; @@ -4112,7 +4115,7 @@ out: static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len, - int free) + u64 *released, int free) { struct extent_changeset changeset; int trace_op = QGROUP_RELEASE; @@ -4128,7 +4131,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, /* In release case, we shouldn't have @reserved */ WARN_ON(!free && reserved); if (free && reserved) - return qgroup_free_reserved_data(inode, reserved, start, len); + return qgroup_free_reserved_data(inode, reserved, start, len, released); extent_changeset_init(&changeset); ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, EXTENT_QGROUP_RESERVED, &changeset); @@ -4143,7 +4146,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, btrfs_qgroup_free_refroot(inode->root->fs_info, inode->root->root_key.objectid, changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); - ret = changeset.bytes_changed; + if (released) + *released = changeset.bytes_changed; out: extent_changeset_release(&changeset); return ret; @@ -4162,9 +4166,10 @@ out: * NOTE: This function may sleep for memory allocation. */ int btrfs_qgroup_free_data(struct btrfs_inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len, u64 *freed) { - return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); + return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1); } /* @@ -4182,9 +4187,9 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode, * * NOTE: This function may sleep for memory allocation. */ -int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released) { - return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); + return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0); } static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, @@ -4332,8 +4337,9 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, qgroup_rsv_release(fs_info, qgroup, num_bytes, BTRFS_QGROUP_RSV_META_PREALLOC); - qgroup_rsv_add(fs_info, qgroup, num_bytes, - BTRFS_QGROUP_RSV_META_PERTRANS); + if (!sb_rdonly(fs_info->sb)) + qgroup_rsv_add(fs_info, qgroup, num_bytes, + BTRFS_QGROUP_RSV_META_PERTRANS); list_for_each_entry(glist, &qgroup->groups, next_group) qgroup_iterator_add(&qgroup_list, glist->group); @@ -4655,6 +4661,17 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) *root = RB_ROOT; } +void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes) +{ + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) + return; + + if (!is_fstree(root)) + return; + + btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA); +} + int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, struct btrfs_squota_delta *delta) { @@ -4699,8 +4716,5 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, out: spin_unlock(&fs_info->qgroup_lock); - if (!ret && delta->rsv_bytes) - btrfs_qgroup_free_refroot(fs_info, root, delta->rsv_bytes, - BTRFS_QGROUP_RSV_DATA); return ret; } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 855a4f978761..be18c862e64e 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -274,8 +274,6 @@ struct btrfs_squota_delta { u64 root; /* The number of bytes in the extent being counted. */ u64 num_bytes; - /* The number of bytes reserved for this extent. */ - u64 rsv_bytes; /* The generation the extent was created in. */ u64 generation; /* Whether we are using or freeing the extent. */ @@ -358,10 +356,10 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, /* New io_tree based accurate qgroup reserve API */ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len); -int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, - u64 len); + u64 len, u64 *freed); int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, @@ -422,6 +420,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb); void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); +void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes); int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, struct btrfs_squota_delta *delta); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bfc0eb5e3b7c..5b3333ceef04 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -37,8 +37,6 @@ static struct kmem_cache *btrfs_trans_handle_cachep; -#define BTRFS_ROOT_TRANS_TAG 0 - /* * Transaction states and transitions * diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 18c4f6e83b78..2bf8bbdfd0b3 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -12,6 +12,9 @@ #include "ctree.h" #include "misc.h" +/* Radix-tree tag for roots that are part of the trasaction. */ +#define BTRFS_ROOT_TRANS_TAG 0 + enum btrfs_trans_state { TRANS_STATE_RUNNING, TRANS_STATE_COMMIT_PREP, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0166bb9ca160..6aa15dafc677 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -349,9 +349,10 @@ static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) return; } /* - * If i_disksize got extended due to writeback of delalloc blocks while - * the DIO was running we could fail to cleanup the orphan list in - * ext4_handle_inode_extension(). Do it now. + * If i_disksize got extended either due to writeback of delalloc + * blocks or extending truncate while the DIO was running we could fail + * to cleanup the orphan list in ext4_handle_inode_extension(). Do it + * now. */ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); @@ -386,10 +387,11 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, * blocks. But the code in ext4_iomap_alloc() is careful to use * zeroed/unwritten extents if this is possible; thus we won't leave * uninitialized blocks in a file even if we didn't succeed in writing - * as much as we intended. + * as much as we intended. Also we can race with truncate or write + * expanding the file so we have to be a bit careful here. */ - WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize)); - if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize)) + if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) && + pos + size <= i_size_read(inode)) return size; return ext4_handle_inode_extension(inode, pos, size); } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 454d5612641e..d72b5e3c92ec 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4478,6 +4478,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, start = max(start, rounddown(ac->ac_o_ex.fe_logical, (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + /* avoid unnecessary preallocation that may trigger assertions */ + if (start + size > EXT_MAX_BLOCKS) + size = EXT_MAX_BLOCKS - start; + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 23904a6a9a96..12ef91d170bb 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1222,6 +1222,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc) if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); + fc->dax = NULL; } } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1cdb6327511e..a660f1f21540 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1448,7 +1448,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; - if (fopen_direct_io && fc->direct_io_relax) { + if (fopen_direct_io && fc->direct_io_allow_mmap) { res = filemap_write_and_wait_range(mapping, pos, pos + count - 1); if (res) { fuse_io_free(ia); @@ -1574,6 +1574,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t res; bool exclusive_lock = !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) || + get_fuse_conn(inode)->direct_io_allow_mmap || iocb->ki_flags & IOCB_APPEND || fuse_direct_write_extending_i_size(iocb, from); @@ -1581,6 +1582,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) * Take exclusive lock if * - Parallel direct writes are disabled - a user space decision * - Parallel direct writes are enabled and i_size is being extended. + * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP). * This might not be needed at all, but needs further investigation. */ if (exclusive_lock) @@ -2466,9 +2468,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) if (ff->open_flags & FOPEN_DIRECT_IO) { /* Can't provide the coherency needed for MAP_SHARED - * if FUSE_DIRECT_IO_RELAX isn't set. + * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set. */ - if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax) + if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap) return -ENODEV; invalidate_inode_pages2(file->f_mapping); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6e6e721f421b..1df83eebda92 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -63,6 +63,19 @@ struct fuse_forget_link { struct fuse_forget_link *next; }; +/* Submount lookup tracking */ +struct fuse_submount_lookup { + /** Refcount */ + refcount_t count; + + /** Unique ID, which identifies the inode between userspace + * and kernel */ + u64 nodeid; + + /** The request used for sending the FORGET message */ + struct fuse_forget_link *forget; +}; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -158,6 +171,8 @@ struct fuse_inode { */ struct fuse_inode_dax *dax; #endif + /** Submount specific lookup tracking */ + struct fuse_submount_lookup *submount_lookup; }; /** FUSE inode state bits */ @@ -797,8 +812,8 @@ struct fuse_conn { /* Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; - /* relax restrictions in FOPEN_DIRECT_IO mode */ - unsigned int direct_io_relax:1; + /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ + unsigned int direct_io_allow_mmap:1; /* Is statx not implemented by fs? */ unsigned int no_statx:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 74d4f09d5827..2a6d44f91729 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -68,6 +68,24 @@ struct fuse_forget_link *fuse_alloc_forget(void) return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } +static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) +{ + struct fuse_submount_lookup *sl; + + sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); + if (!sl) + return NULL; + sl->forget = fuse_alloc_forget(); + if (!sl->forget) + goto out_free; + + return sl; + +out_free: + kfree(sl); + return NULL; +} + static struct inode *fuse_alloc_inode(struct super_block *sb) { struct fuse_inode *fi; @@ -83,6 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->attr_version = 0; fi->orig_ino = 0; fi->state = 0; + fi->submount_lookup = NULL; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); @@ -113,6 +132,17 @@ static void fuse_free_inode(struct inode *inode) kmem_cache_free(fuse_inode_cachep, fi); } +static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, + struct fuse_submount_lookup *sl) +{ + if (!refcount_dec_and_test(&sl->count)) + return; + + fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); + sl->forget = NULL; + kfree(sl); +} + static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -132,6 +162,11 @@ static void fuse_evict_inode(struct inode *inode) fi->nlookup); fi->forget = NULL; } + + if (fi->submount_lookup) { + fuse_cleanup_submount_lookup(fc, fi->submount_lookup); + fi->submount_lookup = NULL; + } } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); @@ -330,6 +365,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } +static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, + u64 nodeid) +{ + sl->nodeid = nodeid; + refcount_set(&sl->count, 1); +} + static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc) { @@ -392,12 +434,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, */ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && S_ISDIR(attr->mode)) { + struct fuse_inode *fi; + inode = new_inode(sb); if (!inode) return NULL; fuse_init_inode(inode, attr, fc); - get_fuse_inode(inode)->nodeid = nodeid; + fi = get_fuse_inode(inode); + fi->nodeid = nodeid; + fi->submount_lookup = fuse_alloc_submount_lookup(); + if (!fi->submount_lookup) { + iput(inode); + return NULL; + } + /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ + fuse_init_submount_lookup(fi->submount_lookup, nodeid); inode->i_flags |= S_AUTOMOUNT; goto done; } @@ -420,11 +472,11 @@ retry: iput(inode); goto retry; } -done: fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); +done: fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); return inode; @@ -1230,8 +1282,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP) fc->create_supp_group = 1; - if (flags & FUSE_DIRECT_IO_RELAX) - fc->direct_io_relax = 1; + if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) + fc->direct_io_allow_mmap = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1278,7 +1330,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | - FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX; + FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; @@ -1465,6 +1517,8 @@ static int fuse_fill_super_submount(struct super_block *sb, struct super_block *parent_sb = parent_fi->inode.i_sb; struct fuse_attr root_attr; struct inode *root; + struct fuse_submount_lookup *sl; + struct fuse_inode *fi; fuse_sb_defaults(sb); fm->sb = sb; @@ -1487,12 +1541,27 @@ static int fuse_fill_super_submount(struct super_block *sb, * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here. */ - get_fuse_inode(root)->nlookup--; + fi = get_fuse_inode(root); + fi->nlookup--; + sb->s_d_op = &fuse_dentry_operations; sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; + /* + * Grab the parent's submount_lookup pointer and take a + * reference on the shared nlookup from the parent. This is to + * prevent the last forget for this nodeid from getting + * triggered until all users have finished with it. + */ + sl = parent_fi->submount_lookup; + WARN_ON(!sl); + if (sl) { + refcount_inc(&sl->count); + fi->submount_lookup = sl; + } + return 0; } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8d6f934c3d95..5e122586e06e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -119,7 +119,7 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; struct timespec64 now; - blk_opf_t write_flags = REQ_OP_WRITE | REQ_SYNC; + blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS; *cbh = NULL; @@ -270,6 +270,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, if (!ret) ret = err; } + cond_resched(); spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); @@ -395,8 +396,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, - journal->j_tail, - REQ_SYNC); + journal->j_tail, 0); mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd2_debug(3, "superblock not updated\n"); @@ -715,6 +715,7 @@ start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; + /* * Compute checksum. */ @@ -727,7 +728,8 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS, + bh); } cond_resched(); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ed53188472f9..206cb53ef2b0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1100,8 +1100,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, - REQ_SYNC | REQ_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); if (ret) goto out; @@ -1775,8 +1774,7 @@ static int journal_reset(journal_t *journal) */ jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, - journal->j_tail, - REQ_SYNC | REQ_FUA); + journal->j_tail, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1798,9 +1796,16 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags) return -EIO; } - trace_jbd2_write_superblock(journal, write_flags); + /* + * Always set high priority flags to exempt from block layer's + * QOS policies, e.g. writeback throttle. + */ + write_flags |= JBD2_JOURNAL_REQ_FLAGS; if (!(journal->j_flags & JBD2_BARRIER)) write_flags &= ~(REQ_FUA | REQ_PREFLUSH); + + trace_jbd2_write_superblock(journal, write_flags); + if (buffer_write_io_error(bh)) { /* * Oh, dear. A previous attempt to write the journal @@ -2050,7 +2055,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode); sb->s_errno = cpu_to_be32(errcode); - jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); + jbd2_write_superblock(journal, REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -2171,8 +2176,7 @@ int jbd2_journal_destroy(journal_t *journal) ++journal->j_transaction_sequence; write_unlock(&journal->j_state_lock); - jbd2_mark_journal_empty(journal, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -2473,7 +2477,7 @@ int jbd2_journal_flush(journal_t *journal, unsigned int flags) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); if (flags) err = __jbd2_journal_erase(journal, flags); @@ -2519,7 +2523,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock_io(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 9fbaaa387dcc..57f2343164a3 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1145,7 +1145,7 @@ struct smb2_server_client_notification { #define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ #define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" -#define SMB2_CREATE_ALLOCATION_SIZE "AISi" +#define SMB2_CREATE_ALLOCATION_SIZE "AlSi" #define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" #define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" #define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" @@ -1253,6 +1253,7 @@ struct create_mxac_rsp { #define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04) #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02) +#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE cpu_to_le32(0x04) #define SMB2_LEASE_KEY_SIZE 16 diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 50c68beb71d6..562b180459a1 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -102,9 +102,10 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->new_state = 0; lease->flags = lctx->flags; lease->duration = lctx->duration; + lease->is_dir = lctx->is_dir; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; - lease->epoch = 0; + lease->epoch = le16_to_cpu(lctx->epoch); INIT_LIST_HEAD(&opinfo->lease_entry); opinfo->o_lease = lease; @@ -395,8 +396,8 @@ void close_id_del_oplock(struct ksmbd_file *fp) { struct oplock_info *opinfo; - if (S_ISDIR(file_inode(fp->filp)->i_mode)) - return; + if (fp->reserve_lease_break) + smb_lazy_parent_lease_break_close(fp); opinfo = opinfo_get(fp); if (!opinfo) @@ -543,12 +544,13 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, /* upgrading lease */ if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) == 1) { - if (lease->state == - (lctx->req_state & lease->state)) { + if (lease->state != SMB2_LEASE_NONE_LE && + lease->state == (lctx->req_state & lease->state)) { lease->state |= lctx->req_state; if (lctx->req_state & SMB2_LEASE_WRITE_CACHING_LE) lease_read_to_write(opinfo); + } } else if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) > 1) { @@ -900,7 +902,8 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) lease->new_state = SMB2_LEASE_READ_CACHING_LE; } else { - if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE) + if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE && + !lease->is_dir) lease->new_state = SMB2_LEASE_READ_CACHING_LE; else @@ -1032,6 +1035,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) SMB2_LEASE_KEY_SIZE); lease2->duration = lease1->duration; lease2->flags = lease1->flags; + lease2->epoch = lease1->epoch++; } static int add_lease_global_list(struct oplock_info *opinfo) @@ -1081,6 +1085,89 @@ static void set_oplock_level(struct oplock_info *opinfo, int level, } } +void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + struct lease_ctx_info *lctx) +{ + struct oplock_info *opinfo; + struct ksmbd_inode *p_ci = NULL; + + if (lctx->version != 2) + return; + + p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); + if (!p_ci) + return; + + read_lock(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (!opinfo->is_lease) + continue; + + if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE && + (!(lctx->flags & SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) || + !compare_guid_key(opinfo, fp->conn->ClientGUID, + lctx->parent_lease_key))) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + continue; + + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + continue; + } + + read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); + read_lock(&p_ci->m_lock); + } + } + read_unlock(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); +} + +void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) +{ + struct oplock_info *opinfo; + struct ksmbd_inode *p_ci = NULL; + + rcu_read_lock(); + opinfo = rcu_dereference(fp->f_opinfo); + rcu_read_unlock(); + + if (!opinfo->is_lease || opinfo->o_lease->version != 2) + return; + + p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); + if (!p_ci) + return; + + read_lock(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (!opinfo->is_lease) + continue; + + if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + continue; + + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + continue; + } + read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); + read_lock(&p_ci->m_lock); + } + } + read_unlock(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); +} + /** * smb_grant_oplock() - handle oplock/lease request on file open * @work: smb work @@ -1104,10 +1191,6 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, bool prev_op_has_lease; __le32 prev_op_state = 0; - /* not support directory lease */ - if (S_ISDIR(file_inode(fp->filp)->i_mode)) - return 0; - opinfo = alloc_opinfo(work, pid, tid); if (!opinfo) return -ENOMEM; @@ -1364,6 +1447,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseFlags = lease->flags; + buf->lcontext.Epoch = cpu_to_le16(++lease->epoch); buf->lcontext.LeaseState = lease->state; memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, SMB2_LEASE_KEY_SIZE); @@ -1400,10 +1484,11 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) /** * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request + * @is_dir: whether leasing file is directory * * Return: oplock state, -ENOENT if create lease context not found */ -struct lease_ctx_info *parse_lease_state(void *open_req) +struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) { struct create_context *cc; struct smb2_create_req *req = (struct smb2_create_req *)open_req; @@ -1421,8 +1506,14 @@ struct lease_ctx_info *parse_lease_state(void *open_req) struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - lreq->req_state = lc->lcontext.LeaseState; + if (is_dir) { + lreq->req_state = lc->lcontext.LeaseState & + ~SMB2_LEASE_WRITE_CACHING_LE; + lreq->is_dir = true; + } else + lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; + lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, SMB2_LEASE_KEY_SIZE); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 4b0fe6da7694..5b93ea9196c0 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -34,7 +34,9 @@ struct lease_ctx_info { __le32 flags; __le64 duration; __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; + __le16 epoch; int version; + bool is_dir; }; struct lease_table { @@ -53,6 +55,7 @@ struct lease { __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; int version; unsigned short epoch; + bool is_dir; struct lease_table *l_lb; }; @@ -108,7 +111,7 @@ void opinfo_put(struct oplock_info *opinfo); /* Lease related functions */ void create_lease_buf(u8 *rbuf, struct lease *lease); -struct lease_ctx_info *parse_lease_state(void *open_req); +struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir); __u8 smb2_map_lease_to_oplock(__le32 lease_state); int lease_read_to_write(struct oplock_info *opinfo); @@ -124,4 +127,7 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn, int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci, struct lease_ctx_info *lctx); void destroy_lease_table(struct ksmbd_conn *conn); +void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + struct lease_ctx_info *lctx); +void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp); #endif /* __KSMBD_OPLOCK_H */ diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index aed7704a0672..27a9dce3e03a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -221,7 +221,8 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION && conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) @@ -245,7 +246,8 @@ void init_smb3_02_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && @@ -270,7 +272,8 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d369b98a6e10..652ab429bf2e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2516,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false); + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, true); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); } @@ -2732,10 +2732,6 @@ int smb2_open(struct ksmbd_work *work) } } - req_op_level = req->RequestedOplockLevel; - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) - lc = parse_lease_state(req); - if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) { pr_err("Invalid impersonationlevel : 0x%x\n", le32_to_cpu(req->ImpersonationLevel)); @@ -3189,23 +3185,6 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } - rc = ksmbd_vfs_getattr(&path, &stat); - if (rc) - goto err_out; - - if (stat.result_mask & STATX_BTIME) - fp->create_time = ksmbd_UnixTimeToNT(stat.btime); - else - fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); - if (req->FileAttributes || fp->f_ci->m_fattr == 0) - fp->f_ci->m_fattr = - cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); - - if (!created) - smb2_update_xattrs(tcon, &path, fp); - else - smb2_new_xattrs(tcon, &path, fp); - if (file_present || created) ksmbd_vfs_kern_path_unlock(&parent_path, &path); @@ -3215,6 +3194,10 @@ int smb2_open(struct ksmbd_work *work) need_truncate = 1; } + req_op_level = req->RequestedOplockLevel; + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) + lc = parse_lease_state(req, S_ISDIR(file_inode(filp)->i_mode)); + share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp); if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) || (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && @@ -3225,6 +3208,13 @@ int smb2_open(struct ksmbd_work *work) } } else { if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + /* + * Compare parent lease using parent key. If there is no + * a lease that has same parent key, Send lease break + * notification. + */ + smb_send_parent_lease_break_noti(fp, lc); + req_op_level = smb2_map_lease_to_oplock(lc->req_state); ksmbd_debug(SMB, "lease req for(%s) req oplock state 0x%x, lease state 0x%x\n", @@ -3295,6 +3285,23 @@ int smb2_open(struct ksmbd_work *work) } } + rc = ksmbd_vfs_getattr(&path, &stat); + if (rc) + goto err_out1; + + if (stat.result_mask & STATX_BTIME) + fp->create_time = ksmbd_UnixTimeToNT(stat.btime); + else + fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); + if (req->FileAttributes || fp->f_ci->m_fattr == 0) + fp->f_ci->m_fattr = + cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); + + if (!created) + smb2_update_xattrs(tcon, &path, fp); + else + smb2_new_xattrs(tcon, &path, fp); + memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); rsp->StructureSize = cpu_to_le16(89); @@ -7080,6 +7087,7 @@ skip: smb2_remove_blocked_lock, argv); if (rc) { + kfree(argv); err = -ENOMEM; goto out; } @@ -8211,6 +8219,11 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) le32_to_cpu(req->LeaseState)); } + if (ret < 0) { + rsp->hdr.Status = err; + goto err_out; + } + lease_state = lease->state; opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); @@ -8218,11 +8231,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) wake_up_interruptible_all(&opinfo->oplock_brk); opinfo_put(opinfo); - if (ret < 0) { - rsp->hdr.Status = err; - goto err_out; - } - rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; rsp->Flags = 0; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 9091dcd7a310..4277750a6da1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -517,6 +517,9 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp, } } + /* Reserve lease break for parent dir at closing time */ + fp->reserve_lease_break = true; + /* Do we need to break any of a levelII oplock? */ smb_break_all_levII_oplock(work, fp, 1); diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index ddf233994ddb..4e82ff627d12 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -87,6 +87,17 @@ static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp) return __ksmbd_inode_lookup(fp->filp->f_path.dentry); } +struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d) +{ + struct ksmbd_inode *ci; + + read_lock(&inode_hash_lock); + ci = __ksmbd_inode_lookup(d); + read_unlock(&inode_hash_lock); + + return ci; +} + int ksmbd_query_inode_status(struct dentry *dentry) { struct ksmbd_inode *ci; @@ -199,7 +210,7 @@ static void ksmbd_inode_free(struct ksmbd_inode *ci) kfree(ci); } -static void ksmbd_inode_put(struct ksmbd_inode *ci) +void ksmbd_inode_put(struct ksmbd_inode *ci) { if (atomic_dec_and_test(&ci->m_count)) ksmbd_inode_free(ci); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 8325cf4527c4..a528f0cc775a 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -105,6 +105,7 @@ struct ksmbd_file { struct ksmbd_readdir_data readdir_data; int dot_dotdot[2]; unsigned int f_state; + bool reserve_lease_break; }; static inline void set_ctx_actor(struct dir_context *ctx, @@ -138,6 +139,8 @@ struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id); struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id, u64 pid); void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp); +struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d); +void ksmbd_inode_put(struct ksmbd_inode *ci); struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id); struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 13ba34e6d64f..2acf191eb89e 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -245,7 +245,7 @@ struct folio *ufs_get_locked_folio(struct address_space *mapping, { struct inode *inode = mapping->host; struct folio *folio = filemap_lock_folio(mapping, index); - if (!folio) { + if (IS_ERR(folio)) { folio = read_mapping_folio(mapping, index, NULL); if (IS_ERR(folio)) { |