summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/btrfs/delalloc-space.c2
-rw-r--r--fs/btrfs/disk-io.c28
-rw-r--r--fs/btrfs/extent-tree.c48
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/ordered-data.c11
-rw-r--r--fs/btrfs/qgroup.c44
-rw-r--r--fs/btrfs/qgroup.h7
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/debugfs/file.c10
-rw-r--r--fs/debugfs/inode.c7
-rw-r--r--fs/debugfs/internal.h6
-rw-r--r--fs/ext4/file.c14
-rw-r--r--fs/ext4/mballoc.c4
-rw-r--r--fs/fuse/dax.c1
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/fuse_i.h19
-rw-r--r--fs/fuse/inode.c81
-rw-r--r--fs/jbd2/commit.c10
-rw-r--r--fs/jbd2/journal.c24
-rw-r--r--fs/nfsd/auth.c4
-rw-r--r--fs/nfsd/nfs4callback.c97
-rw-r--r--fs/nfsd/nfs4state.c114
-rw-r--r--fs/nfsd/nfs4xdr.c7
-rw-r--r--fs/nfsd/nfsctl.c18
-rw-r--r--fs/nfsd/nfsd.h1
-rw-r--r--fs/nfsd/nfssvc.c3
-rw-r--r--fs/nfsd/state.h25
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/nfsd/xdr4cb.h18
-rw-r--r--fs/nilfs2/sufile.c42
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/open.c3
-rw-r--r--fs/proc/task_mmu.c26
-rw-r--r--fs/smb/client/cached_dir.c17
-rw-r--r--fs/smb/client/cifsfs.c174
-rw-r--r--fs/smb/client/connect.c35
-rw-r--r--fs/smb/client/smb2misc.c26
-rw-r--r--fs/smb/client/smb2ops.c42
-rw-r--r--fs/smb/client/smb2pdu.c127
-rw-r--r--fs/smb/client/smb2proto.h12
-rw-r--r--fs/smb/common/smb2pdu.h3
-rw-r--r--fs/smb/server/oplock.c115
-rw-r--r--fs/smb/server/oplock.h8
-rw-r--r--fs/smb/server/smb2ops.c9
-rw-r--r--fs/smb/server/smb2pdu.c62
-rw-r--r--fs/smb/server/vfs.c3
-rw-r--r--fs/smb/server/vfs_cache.c13
-rw-r--r--fs/smb/server/vfs_cache.h3
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/tracefs/event_inode.c4
-rw-r--r--fs/ufs/util.c2
57 files changed, 819 insertions, 573 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index fd1f655b4f1f..42837617a55b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -268,6 +268,7 @@ config HUGETLBFS
config HUGETLB_PAGE
def_bool HUGETLBFS
+ select XARRAY_MULTI
config HUGETLB_PAGE_OPTIMIZE_VMEMMAP
def_bool HUGETLB_PAGE
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ed1644e7683f..d642d06a453b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -424,7 +424,7 @@ error_kill_call:
if (call->async) {
if (cancel_work_sync(&call->async_work))
afs_put_call(call);
- afs_put_call(call);
+ afs_set_call_complete(call, ret, 0);
}
ac->error = ret;
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 51453d4928fa..2833e8ef4c09 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -199,7 +199,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
start = round_down(start, fs_info->sectorsize);
btrfs_free_reserved_data_space_noquota(fs_info, len);
- btrfs_qgroup_free_data(inode, reserved, start, len);
+ btrfs_qgroup_free_data(inode, reserved, start, len, NULL);
}
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bbcc3df77646..62cb97f7c94f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4799,6 +4799,32 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
}
}
+static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *gang[8];
+ int i;
+ int ret;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ while (1) {
+ ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang),
+ BTRFS_ROOT_TRANS_TAG);
+ if (ret == 0)
+ break;
+ for (i = 0; i < ret; i++) {
+ struct btrfs_root *root = gang[i];
+
+ btrfs_qgroup_free_meta_all_pertrans(root);
+ radix_tree_tag_clear(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ }
+ }
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+}
+
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_fs_info *fs_info)
{
@@ -4827,6 +4853,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
+ btrfs_free_all_qgroup_pertrans(fs_info);
+
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0455935ff558..01423670bc8a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1547,6 +1547,23 @@ out:
return ret;
}
+static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_head *href)
+{
+ u64 root = href->owning_root;
+
+ /*
+ * Don't check must_insert_reserved, as this is called from contexts
+ * where it has already been unset.
+ */
+ if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE ||
+ !href->is_data || !is_fstree(root))
+ return;
+
+ btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes,
+ BTRFS_QGROUP_RSV_DATA);
+}
+
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *node,
@@ -1569,7 +1586,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_squota_delta delta = {
.root = href->owning_root,
.num_bytes = node->num_bytes,
- .rsv_bytes = href->reserved_bytes,
.is_data = true,
.is_inc = true,
.generation = trans->transid,
@@ -1586,11 +1602,9 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
flags, ref->objectid,
ref->offset, &key,
node->ref_mod, href->owning_root);
+ free_head_ref_squota_rsv(trans->fs_info, href);
if (!ret)
ret = btrfs_record_squota_delta(trans->fs_info, &delta);
- else
- btrfs_qgroup_free_refroot(trans->fs_info, delta.root,
- delta.rsv_bytes, BTRFS_QGROUP_RSV_DATA);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root,
ref->objectid, ref->offset,
@@ -1742,7 +1756,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_squota_delta delta = {
.root = href->owning_root,
.num_bytes = fs_info->nodesize,
- .rsv_bytes = 0,
.is_data = false,
.is_inc = true,
.generation = trans->transid,
@@ -1774,8 +1787,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
int ret = 0;
if (TRANS_ABORTED(trans)) {
- if (insert_reserved)
+ if (insert_reserved) {
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
+ free_head_ref_squota_rsv(trans->fs_info, href);
+ }
return 0;
}
@@ -1871,6 +1886,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
+ u64 ret = 0;
+
/*
* We had csum deletions accounted for in our delayed refs rsv, we need
* to drop the csum leaves for this update from our delayed_refs_rsv.
@@ -1885,14 +1902,13 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums);
- return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
+ ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
}
- if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE &&
- head->must_insert_reserved && head->is_data)
- btrfs_qgroup_free_refroot(fs_info, head->owning_root,
- head->reserved_bytes, BTRFS_QGROUP_RSV_DATA);
+ /* must_insert_reserved can be set only if we didn't run the head ref. */
+ if (head->must_insert_reserved)
+ free_head_ref_squota_rsv(fs_info, head);
- return 0;
+ return ret;
}
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
@@ -2033,6 +2049,12 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
* spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved;
+ /*
+ * Unsetting this on the head ref relinquishes ownership of
+ * the rsv_bytes, so it is critical that every possible code
+ * path from here forward frees all reserves including qgroup
+ * reserve.
+ */
locked_ref->must_insert_reserved = false;
extent_op = locked_ref->extent_op;
@@ -3292,7 +3314,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_squota_delta delta = {
.root = delayed_ref_root,
.num_bytes = num_bytes,
- .rsv_bytes = 0,
.is_data = is_data,
.is_inc = false,
.generation = btrfs_extent_generation(leaf, ei),
@@ -4937,7 +4958,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
.root = root_objectid,
.num_bytes = ins->offset,
.generation = trans->transid,
- .rsv_bytes = 0,
.is_data = true,
.is_inc = true,
};
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e6230a6ffa98..8f724c54fc8e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2302,7 +2302,8 @@ static int try_release_extent_state(struct extent_io_tree *tree,
ret = 0;
} else {
u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM |
- EXTENT_DELALLOC_NEW | EXTENT_CTLBITS);
+ EXTENT_DELALLOC_NEW | EXTENT_CTLBITS |
+ EXTENT_QGROUP_RESERVED);
/*
* At this point we can safely clear everything except the
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f47731c45bb5..32611a4edd6b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3192,7 +3192,7 @@ static long btrfs_fallocate(struct file *file, int mode,
qgroup_reserved -= range->len;
} else if (qgroup_reserved > 0) {
btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved,
- range->start, range->len);
+ range->start, range->len, NULL);
qgroup_reserved -= range->len;
}
list_del(&range->list);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9f5a9894f88f..fb3c3f43c3fa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -688,7 +688,7 @@ out:
* And at reserve time, it's always aligned to page size, so
* just free one page here.
*/
- btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL);
btrfs_free_path(path);
btrfs_end_transaction(trans);
return ret;
@@ -5132,7 +5132,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
*/
if (state_flags & EXTENT_DELALLOC)
btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
- end - start + 1);
+ end - start + 1, NULL);
clear_extent_bit(io_tree, start, end,
EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING,
@@ -8059,7 +8059,7 @@ next:
* reserved data space.
* Since the IO will never happen for this page.
*/
- btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
+ btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL);
if (!inode_evicting) {
clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_UPTODATE |
@@ -9491,7 +9491,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
struct btrfs_path *path;
u64 start = ins->objectid;
u64 len = ins->offset;
- int qgroup_released;
+ u64 qgroup_released = 0;
int ret;
memset(&stack_fi, 0, sizeof(stack_fi));
@@ -9504,9 +9504,9 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
/* Encryption and other encoding is reserved and all 0 */
- qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
- if (qgroup_released < 0)
- return ERR_PTR(qgroup_released);
+ ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released);
+ if (ret < 0)
+ return ERR_PTR(ret);
if (trans) {
ret = insert_reserved_file_extent(trans, inode,
@@ -10401,7 +10401,7 @@ out_delalloc_release:
btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
out_qgroup_free_data:
if (ret < 0)
- btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
+ btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL);
out_free_data_space:
/*
* If btrfs_reserve_extent() succeeded, then we already decremented
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4e50b62db2a8..a1743904202b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1290,6 +1290,15 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
* are limited to own subvolumes only
*/
ret = -EPERM;
+ } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * Snapshots must be made with the src_inode referring
+ * to the subvolume inode, otherwise the permission
+ * checking above is useless because we may have
+ * permission on a lower directory but not the subvol
+ * itself.
+ */
+ ret = -EINVAL;
} else {
ret = btrfs_mksnapshot(&file->f_path, idmap,
name, namelen,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 574e8a55e24a..a82e1417c4d2 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -152,11 +152,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
{
struct btrfs_ordered_extent *entry;
int ret;
+ u64 qgroup_rsv = 0;
if (flags &
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */
- ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
+ ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv);
if (ret < 0)
return ERR_PTR(ret);
} else {
@@ -164,7 +165,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
* The ordered extent has reserved qgroup space, release now
* and pass the reserved number for qgroup_record to free.
*/
- ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
+ ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv);
if (ret < 0)
return ERR_PTR(ret);
}
@@ -182,7 +183,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
- entry->qgroup_rsv = ret;
+ entry->qgroup_rsv = qgroup_rsv;
entry->flags = flags;
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
@@ -599,7 +600,9 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
release = entry->disk_num_bytes;
else
release = entry->num_bytes;
- btrfs_delalloc_release_metadata(btrfs_inode, release, false);
+ btrfs_delalloc_release_metadata(btrfs_inode, release,
+ test_bit(BTRFS_ORDERED_IOERR,
+ &entry->flags));
}
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index ce446d9d7f23..e46774e8f49f 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4057,13 +4057,14 @@ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
/* Free ranges specified by @reserved, normally in error path */
static int qgroup_free_reserved_data(struct btrfs_inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len)
+ struct extent_changeset *reserved,
+ u64 start, u64 len, u64 *freed_ret)
{
struct btrfs_root *root = inode->root;
struct ulist_node *unode;
struct ulist_iterator uiter;
struct extent_changeset changeset;
- int freed = 0;
+ u64 freed = 0;
int ret;
extent_changeset_init(&changeset);
@@ -4104,7 +4105,9 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
}
btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed,
BTRFS_QGROUP_RSV_DATA);
- ret = freed;
+ if (freed_ret)
+ *freed_ret = freed;
+ ret = 0;
out:
extent_changeset_release(&changeset);
return ret;
@@ -4112,7 +4115,7 @@ out:
static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len,
- int free)
+ u64 *released, int free)
{
struct extent_changeset changeset;
int trace_op = QGROUP_RELEASE;
@@ -4128,7 +4131,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
/* In release case, we shouldn't have @reserved */
WARN_ON(!free && reserved);
if (free && reserved)
- return qgroup_free_reserved_data(inode, reserved, start, len);
+ return qgroup_free_reserved_data(inode, reserved, start, len, released);
extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
EXTENT_QGROUP_RESERVED, &changeset);
@@ -4143,7 +4146,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
btrfs_qgroup_free_refroot(inode->root->fs_info,
inode->root->root_key.objectid,
changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
- ret = changeset.bytes_changed;
+ if (released)
+ *released = changeset.bytes_changed;
out:
extent_changeset_release(&changeset);
return ret;
@@ -4162,9 +4166,10 @@ out:
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len)
+ struct extent_changeset *reserved,
+ u64 start, u64 len, u64 *freed)
{
- return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
+ return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1);
}
/*
@@ -4182,9 +4187,9 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode,
*
* NOTE: This function may sleep for memory allocation.
*/
-int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
+int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released)
{
- return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
+ return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0);
}
static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
@@ -4332,8 +4337,9 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
qgroup_rsv_release(fs_info, qgroup, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC);
- qgroup_rsv_add(fs_info, qgroup, num_bytes,
- BTRFS_QGROUP_RSV_META_PERTRANS);
+ if (!sb_rdonly(fs_info->sb))
+ qgroup_rsv_add(fs_info, qgroup, num_bytes,
+ BTRFS_QGROUP_RSV_META_PERTRANS);
list_for_each_entry(glist, &qgroup->groups, next_group)
qgroup_iterator_add(&qgroup_list, glist->group);
@@ -4655,6 +4661,17 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
*root = RB_ROOT;
}
+void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)
+{
+ if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE)
+ return;
+
+ if (!is_fstree(root))
+ return;
+
+ btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA);
+}
+
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
struct btrfs_squota_delta *delta)
{
@@ -4699,8 +4716,5 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
out:
spin_unlock(&fs_info->qgroup_lock);
- if (!ret && delta->rsv_bytes)
- btrfs_qgroup_free_refroot(fs_info, root, delta->rsv_bytes,
- BTRFS_QGROUP_RSV_DATA);
return ret;
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 855a4f978761..be18c862e64e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -274,8 +274,6 @@ struct btrfs_squota_delta {
u64 root;
/* The number of bytes in the extent being counted. */
u64 num_bytes;
- /* The number of bytes reserved for this extent. */
- u64 rsv_bytes;
/* The generation the extent was created in. */
u64 generation;
/* Whether we are using or freeing the extent. */
@@ -358,10 +356,10 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
/* New io_tree based accurate qgroup reserve API */
int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
-int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
+int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released);
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
- u64 len);
+ u64 len, u64 *freed);
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
@@ -422,6 +420,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
+void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes);
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
struct btrfs_squota_delta *delta);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bfc0eb5e3b7c..5b3333ceef04 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -37,8 +37,6 @@
static struct kmem_cache *btrfs_trans_handle_cachep;
-#define BTRFS_ROOT_TRANS_TAG 0
-
/*
* Transaction states and transitions
*
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 18c4f6e83b78..2bf8bbdfd0b3 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -12,6 +12,9 @@
#include "ctree.h"
#include "misc.h"
+/* Radix-tree tag for roots that are part of the trasaction. */
+#define BTRFS_ROOT_TRANS_TAG 0
+
enum btrfs_trans_state {
TRANS_STATE_RUNNING,
TRANS_STATE_COMMIT_PREP,
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index a5ade8c16375..5063434be0fc 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -108,12 +108,6 @@ int debugfs_file_get(struct dentry *dentry)
kfree(fsd);
fsd = READ_ONCE(dentry->d_fsdata);
}
-#ifdef CONFIG_LOCKDEP
- fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry);
- lockdep_register_key(&fsd->key);
- lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs",
- &fsd->key, 0);
-#endif
INIT_LIST_HEAD(&fsd->cancellations);
mutex_init(&fsd->cancellations_mtx);
}
@@ -132,8 +126,6 @@ int debugfs_file_get(struct dentry *dentry)
if (!refcount_inc_not_zero(&fsd->active_users))
return -EIO;
- lock_map_acquire_read(&fsd->lockdep_map);
-
return 0;
}
EXPORT_SYMBOL_GPL(debugfs_file_get);
@@ -151,8 +143,6 @@ void debugfs_file_put(struct dentry *dentry)
{
struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
- lock_map_release(&fsd->lockdep_map);
-
if (refcount_dec_and_test(&fsd->active_users))
complete(&fsd->active_users_drained);
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e4e7fe1bd9fb..034a617cb1a5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -243,10 +243,6 @@ static void debugfs_release_dentry(struct dentry *dentry)
/* check it wasn't a dir (no fsdata) or automount (no real_fops) */
if (fsd && fsd->real_fops) {
-#ifdef CONFIG_LOCKDEP
- lockdep_unregister_key(&fsd->key);
- kfree(fsd->lock_name);
-#endif
WARN_ON(!list_empty(&fsd->cancellations));
mutex_destroy(&fsd->cancellations_mtx);
}
@@ -755,9 +751,6 @@ static void __debugfs_file_removed(struct dentry *dentry)
if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
return;
- lock_map_acquire(&fsd->lockdep_map);
- lock_map_release(&fsd->lockdep_map);
-
/* if we hit zero, just wait for all to finish */
if (!refcount_dec_and_test(&fsd->active_users)) {
wait_for_completion(&fsd->active_users_drained);
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index 0c4c68cf161f..dae80c2a469e 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -7,7 +7,6 @@
#ifndef _DEBUGFS_INTERNAL_H_
#define _DEBUGFS_INTERNAL_H_
-#include <linux/lockdep.h>
#include <linux/list.h>
struct file_operations;
@@ -25,11 +24,6 @@ struct debugfs_fsdata {
struct {
refcount_t active_users;
struct completion active_users_drained;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
- struct lock_class_key key;
- char *lock_name;
-#endif
/* protect cancellations */
struct mutex cancellations_mtx;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0166bb9ca160..6aa15dafc677 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -349,9 +349,10 @@ static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
return;
}
/*
- * If i_disksize got extended due to writeback of delalloc blocks while
- * the DIO was running we could fail to cleanup the orphan list in
- * ext4_handle_inode_extension(). Do it now.
+ * If i_disksize got extended either due to writeback of delalloc
+ * blocks or extending truncate while the DIO was running we could fail
+ * to cleanup the orphan list in ext4_handle_inode_extension(). Do it
+ * now.
*/
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
@@ -386,10 +387,11 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
* blocks. But the code in ext4_iomap_alloc() is careful to use
* zeroed/unwritten extents if this is possible; thus we won't leave
* uninitialized blocks in a file even if we didn't succeed in writing
- * as much as we intended.
+ * as much as we intended. Also we can race with truncate or write
+ * expanding the file so we have to be a bit careful here.
*/
- WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize));
- if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize))
+ if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) &&
+ pos + size <= i_size_read(inode))
return size;
return ext4_handle_inode_extension(inode, pos, size);
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 454d5612641e..d72b5e3c92ec 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4478,6 +4478,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
start = max(start, rounddown(ac->ac_o_ex.fe_logical,
(ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
+ /* avoid unnecessary preallocation that may trigger assertions */
+ if (start + size > EXT_MAX_BLOCKS)
+ size = EXT_MAX_BLOCKS - start;
+
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
size -= ar->lleft + 1 - start;
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 23904a6a9a96..12ef91d170bb 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1222,6 +1222,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc)
if (fc->dax) {
fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
kfree(fc->dax);
+ fc->dax = NULL;
}
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1cdb6327511e..a660f1f21540 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1448,7 +1448,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (!ia)
return -ENOMEM;
- if (fopen_direct_io && fc->direct_io_relax) {
+ if (fopen_direct_io && fc->direct_io_allow_mmap) {
res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
if (res) {
fuse_io_free(ia);
@@ -1574,6 +1574,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t res;
bool exclusive_lock =
!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
+ get_fuse_conn(inode)->direct_io_allow_mmap ||
iocb->ki_flags & IOCB_APPEND ||
fuse_direct_write_extending_i_size(iocb, from);
@@ -1581,6 +1582,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
* Take exclusive lock if
* - Parallel direct writes are disabled - a user space decision
* - Parallel direct writes are enabled and i_size is being extended.
+ * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP).
* This might not be needed at all, but needs further investigation.
*/
if (exclusive_lock)
@@ -2466,9 +2468,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
if (ff->open_flags & FOPEN_DIRECT_IO) {
/* Can't provide the coherency needed for MAP_SHARED
- * if FUSE_DIRECT_IO_RELAX isn't set.
+ * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set.
*/
- if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax)
+ if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap)
return -ENODEV;
invalidate_inode_pages2(file->f_mapping);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6e6e721f421b..1df83eebda92 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -63,6 +63,19 @@ struct fuse_forget_link {
struct fuse_forget_link *next;
};
+/* Submount lookup tracking */
+struct fuse_submount_lookup {
+ /** Refcount */
+ refcount_t count;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_forget_link *forget;
+};
+
/** FUSE inode */
struct fuse_inode {
/** Inode data */
@@ -158,6 +171,8 @@ struct fuse_inode {
*/
struct fuse_inode_dax *dax;
#endif
+ /** Submount specific lookup tracking */
+ struct fuse_submount_lookup *submount_lookup;
};
/** FUSE inode state bits */
@@ -797,8 +812,8 @@ struct fuse_conn {
/* Is tmpfile not implemented by fs? */
unsigned int no_tmpfile:1;
- /* relax restrictions in FOPEN_DIRECT_IO mode */
- unsigned int direct_io_relax:1;
+ /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */
+ unsigned int direct_io_allow_mmap:1;
/* Is statx not implemented by fs? */
unsigned int no_statx:1;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 74d4f09d5827..2a6d44f91729 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -68,6 +68,24 @@ struct fuse_forget_link *fuse_alloc_forget(void)
return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
}
+static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
+{
+ struct fuse_submount_lookup *sl;
+
+ sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
+ if (!sl)
+ return NULL;
+ sl->forget = fuse_alloc_forget();
+ if (!sl->forget)
+ goto out_free;
+
+ return sl;
+
+out_free:
+ kfree(sl);
+ return NULL;
+}
+
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
struct fuse_inode *fi;
@@ -83,6 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->attr_version = 0;
fi->orig_ino = 0;
fi->state = 0;
+ fi->submount_lookup = NULL;
mutex_init(&fi->mutex);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
@@ -113,6 +132,17 @@ static void fuse_free_inode(struct inode *inode)
kmem_cache_free(fuse_inode_cachep, fi);
}
+static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
+ struct fuse_submount_lookup *sl)
+{
+ if (!refcount_dec_and_test(&sl->count))
+ return;
+
+ fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
+ sl->forget = NULL;
+ kfree(sl);
+}
+
static void fuse_evict_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -132,6 +162,11 @@ static void fuse_evict_inode(struct inode *inode)
fi->nlookup);
fi->forget = NULL;
}
+
+ if (fi->submount_lookup) {
+ fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
+ fi->submount_lookup = NULL;
+ }
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(!list_empty(&fi->write_files));
@@ -330,6 +365,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
+static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
+ u64 nodeid)
+{
+ sl->nodeid = nodeid;
+ refcount_set(&sl->count, 1);
+}
+
static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
struct fuse_conn *fc)
{
@@ -392,12 +434,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
*/
if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
S_ISDIR(attr->mode)) {
+ struct fuse_inode *fi;
+
inode = new_inode(sb);
if (!inode)
return NULL;
fuse_init_inode(inode, attr, fc);
- get_fuse_inode(inode)->nodeid = nodeid;
+ fi = get_fuse_inode(inode);
+ fi->nodeid = nodeid;
+ fi->submount_lookup = fuse_alloc_submount_lookup();
+ if (!fi->submount_lookup) {
+ iput(inode);
+ return NULL;
+ }
+ /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
+ fuse_init_submount_lookup(fi->submount_lookup, nodeid);
inode->i_flags |= S_AUTOMOUNT;
goto done;
}
@@ -420,11 +472,11 @@ retry:
iput(inode);
goto retry;
}
-done:
fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
fi->nlookup++;
spin_unlock(&fi->lock);
+done:
fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
return inode;
@@ -1230,8 +1282,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->init_security = 1;
if (flags & FUSE_CREATE_SUPP_GROUP)
fc->create_supp_group = 1;
- if (flags & FUSE_DIRECT_IO_RELAX)
- fc->direct_io_relax = 1;
+ if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
+ fc->direct_io_allow_mmap = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -1278,7 +1330,7 @@ void fuse_send_init(struct fuse_mount *fm)
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
- FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX;
+ FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
flags |= FUSE_MAP_ALIGNMENT;
@@ -1465,6 +1517,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
struct super_block *parent_sb = parent_fi->inode.i_sb;
struct fuse_attr root_attr;
struct inode *root;
+ struct fuse_submount_lookup *sl;
+ struct fuse_inode *fi;
fuse_sb_defaults(sb);
fm->sb = sb;
@@ -1487,12 +1541,27 @@ static int fuse_fill_super_submount(struct super_block *sb,
* its nlookup should not be incremented. fuse_iget() does
* that, though, so undo it here.
*/
- get_fuse_inode(root)->nlookup--;
+ fi = get_fuse_inode(root);
+ fi->nlookup--;
+
sb->s_d_op = &fuse_dentry_operations;
sb->s_root = d_make_root(root);
if (!sb->s_root)
return -ENOMEM;
+ /*
+ * Grab the parent's submount_lookup pointer and take a
+ * reference on the shared nlookup from the parent. This is to
+ * prevent the last forget for this nodeid from getting
+ * triggered until all users have finished with it.
+ */
+ sl = parent_fi->submount_lookup;
+ WARN_ON(!sl);
+ if (sl) {
+ refcount_inc(&sl->count);
+ fi->submount_lookup = sl;
+ }
+
return 0;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8d6f934c3d95..5e122586e06e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -119,7 +119,7 @@ static int journal_submit_commit_record(journal_t *journal,
struct commit_header *tmp;
struct buffer_head *bh;
struct timespec64 now;
- blk_opf_t write_flags = REQ_OP_WRITE | REQ_SYNC;
+ blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS;
*cbh = NULL;
@@ -270,6 +270,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
if (!ret)
ret = err;
}
+ cond_resched();
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb();
@@ -395,8 +396,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_update_sb_log_tail(journal,
journal->j_tail_sequence,
- journal->j_tail,
- REQ_SYNC);
+ journal->j_tail, 0);
mutex_unlock(&journal->j_checkpoint_mutex);
} else {
jbd2_debug(3, "superblock not updated\n");
@@ -715,6 +715,7 @@ start_journal_io:
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
+
/*
* Compute checksum.
*/
@@ -727,7 +728,8 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
+ submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS,
+ bh);
}
cond_resched();
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed53188472f9..206cb53ef2b0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1100,8 +1100,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
* space and if we lose sb update during power failure we'd replay
* old transaction with possibly newly overwritten data.
*/
- ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
- REQ_SYNC | REQ_FUA);
+ ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA);
if (ret)
goto out;
@@ -1775,8 +1774,7 @@ static int journal_reset(journal_t *journal)
*/
jbd2_journal_update_sb_log_tail(journal,
journal->j_tail_sequence,
- journal->j_tail,
- REQ_SYNC | REQ_FUA);
+ journal->j_tail, REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
return jbd2_journal_start_thread(journal);
@@ -1798,9 +1796,16 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
return -EIO;
}
- trace_jbd2_write_superblock(journal, write_flags);
+ /*
+ * Always set high priority flags to exempt from block layer's
+ * QOS policies, e.g. writeback throttle.
+ */
+ write_flags |= JBD2_JOURNAL_REQ_FLAGS;
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
+
+ trace_jbd2_write_superblock(journal, write_flags);
+
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
@@ -2050,7 +2055,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
sb->s_errno = cpu_to_be32(errcode);
- jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
+ jbd2_write_superblock(journal, REQ_FUA);
}
EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
@@ -2171,8 +2176,7 @@ int jbd2_journal_destroy(journal_t *journal)
++journal->j_transaction_sequence;
write_unlock(&journal->j_state_lock);
- jbd2_mark_journal_empty(journal,
- REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
+ jbd2_mark_journal_empty(journal, REQ_PREFLUSH | REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
} else
err = -EIO;
@@ -2473,7 +2477,7 @@ int jbd2_journal_flush(journal_t *journal, unsigned int flags)
* the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current
* s_start value. */
- jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
+ jbd2_mark_journal_empty(journal, REQ_FUA);
if (flags)
err = __jbd2_journal_erase(journal, flags);
@@ -2519,7 +2523,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (write) {
/* Lock to make assertions happy... */
mutex_lock_io(&journal->j_checkpoint_mutex);
- jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
+ jbd2_mark_journal_empty(journal, REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index fdf2aad73470..e6beaaf4f170 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -26,8 +26,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
int i;
int flags = nfsexp_flags(rqstp, exp);
- validate_process_creds();
-
/* discard any old override before preparing the new set */
revert_creds(get_cred(current_real_cred()));
new = prepare_creds();
@@ -81,10 +79,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
else
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
- validate_process_creds();
put_cred(override_creds(new));
put_cred(new);
- validate_process_creds();
return 0;
oom:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 92bc109dabe6..4039ffcf90ba 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -84,21 +84,7 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n)
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len)
{
- xdr_stream_encode_uint32_array(xdr, bitmap, len);
-}
-
-static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
- struct nfs4_cb_fattr *fattr)
-{
- fattr->ncf_cb_change = 0;
- fattr->ncf_cb_fsize = 0;
- if (bitmap[0] & FATTR4_WORD0_CHANGE)
- if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
- return -NFSERR_BAD_XDR;
- if (bitmap[0] & FATTR4_WORD0_SIZE)
- if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
- return -NFSERR_BAD_XDR;
- return 0;
+ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
}
/*
@@ -372,30 +358,6 @@ encode_cb_recallany4args(struct xdr_stream *xdr,
}
/*
- * CB_GETATTR4args
- * struct CB_GETATTR4args {
- * nfs_fh4 fh;
- * bitmap4 attr_request;
- * };
- *
- * The size and change attributes are the only one
- * guaranteed to be serviced by the client.
- */
-static void
-encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
- struct nfs4_cb_fattr *fattr)
-{
- struct nfs4_delegation *dp =
- container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
- struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
-
- encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
- encode_nfs_fh4(xdr, fh);
- encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
- hdr->nops++;
-}
-
-/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -531,26 +493,6 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
- * 20.1. Operation 3: CB_GETATTR - Get Attributes
- */
-static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
- struct xdr_stream *xdr, const void *data)
-{
- const struct nfsd4_callback *cb = data;
- struct nfs4_cb_fattr *ncf =
- container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
- struct nfs4_cb_compound_hdr hdr = {
- .ident = cb->cb_clp->cl_cb_ident,
- .minorversion = cb->cb_clp->cl_minorversion,
- };
-
- encode_cb_compound4args(xdr, &hdr);
- encode_cb_sequence4args(xdr, cb, &hdr);
- encode_cb_getattr4args(xdr, &hdr, ncf);
- encode_cb_nops(&hdr);
-}
-
-/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -606,42 +548,6 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
- * 20.1. Operation 3: CB_GETATTR - Get Attributes
- */
-static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
- struct xdr_stream *xdr,
- void *data)
-{
- struct nfsd4_callback *cb = data;
- struct nfs4_cb_compound_hdr hdr;
- int status;
- u32 bitmap[3] = {0};
- u32 attrlen;
- struct nfs4_cb_fattr *ncf =
- container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
-
- status = decode_cb_compound4res(xdr, &hdr);
- if (unlikely(status))
- return status;
-
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
-
- status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
- if (status)
- return status;
- if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
- return -NFSERR_BAD_XDR;
- if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
- return -NFSERR_BAD_XDR;
- if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
- return -NFSERR_BAD_XDR;
- status = decode_cb_fattr4(xdr, bitmap, ncf);
- return status;
-}
-
-/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
@@ -949,7 +855,6 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
- PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 40415929e2ae..3edbfa0233e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -127,7 +127,6 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
-static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
static struct workqueue_struct *laundry_wq;
@@ -1190,10 +1189,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
- nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
- &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
- dp->dl_cb_fattr.ncf_file_modified = false;
- dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -2901,56 +2896,11 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
spin_unlock(&nn->client_lock);
}
-static int
-nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
-{
- struct nfs4_cb_fattr *ncf =
- container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
-
- ncf->ncf_cb_status = task->tk_status;
- switch (task->tk_status) {
- case -NFS4ERR_DELAY:
- rpc_delay(task, 2 * HZ);
- return 0;
- default:
- return 1;
- }
-}
-
-static void
-nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
-{
- struct nfs4_cb_fattr *ncf =
- container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
- struct nfs4_delegation *dp =
- container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
-
- nfs4_put_stid(&dp->dl_stid);
- clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
- wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
-}
-
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
};
-static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
- .done = nfsd4_cb_getattr_done,
- .release = nfsd4_cb_getattr_release,
-};
-
-void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
-{
- struct nfs4_delegation *dp =
- container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
-
- if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
- return;
- refcount_inc(&dp->dl_stid.sc_count);
- nfsd4_run_cb(&ncf->ncf_getattr);
-}
-
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -5685,8 +5635,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent = NULL;
int cb_up;
int status = 0;
- struct kstat stat;
- struct path path;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -5724,18 +5672,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
- path.mnt = currentfh->fh_export->ex_path.mnt;
- path.dentry = currentfh->fh_dentry;
- if (vfs_getattr(&path, &stat,
- (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
- AT_STATX_SYNC_AS_STAT)) {
- nfs4_put_stid(&dp->dl_stid);
- destroy_delegation(dp);
- goto out_no_deleg;
- }
- dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
- dp->dl_cb_fattr.ncf_initial_cinfo =
- nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
} else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
@@ -8492,8 +8428,6 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @inode: file to be checked for a conflict
- * @modified: return true if file was modified
- * @size: new size of file if modified is true
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@@ -8502,23 +8436,21 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* delegation before replying to the GETATTR. See RFC 8881 section
* 18.7.4.
*
+ * The current implementation does not support CB_GETATTR yet. However
+ * this can avoid recalling the delegation could be added in follow up
+ * work.
+ *
* Returns 0 if there is no conflict; otherwise an nfs_stat
* code is returned.
*/
__be32
-nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
- bool *modified, u64 *size)
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
{
+ __be32 status;
struct file_lock_context *ctx;
- struct nfs4_delegation *dp;
- struct nfs4_cb_fattr *ncf;
struct file_lock *fl;
- struct iattr attrs;
- __be32 status;
-
- might_sleep();
+ struct nfs4_delegation *dp;
- *modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
return 0;
@@ -8545,34 +8477,10 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
break_lease:
spin_unlock(&ctx->flc_lock);
nfsd_stats_wdeleg_getattr_inc();
-
- dp = fl->fl_owner;
- ncf = &dp->dl_cb_fattr;
- nfs4_cb_getattr(&dp->dl_cb_fattr);
- wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
- if (ncf->ncf_cb_status) {
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
- if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode))
- return status;
- }
- if (!ncf->ncf_file_modified &&
- (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
- ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
- ncf->ncf_file_modified = true;
- if (ncf->ncf_file_modified) {
- /*
- * The server would not update the file's metadata
- * with the client's modified size.
- */
- attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
- attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
- setattr_copy(&nop_mnt_idmap, inode, &attrs);
- mark_inode_dirty(inode);
- ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
- *size = ncf->ncf_cur_fsize;
- *modified = true;
- }
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
return 0;
}
break;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ec4ed6206df1..b499fe9caa32 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3505,9 +3505,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
u32 attrmask[3];
unsigned long mask[2];
} u;
- bool file_modified;
unsigned long bit;
- u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -3534,8 +3532,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
- &file_modified, &size);
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
if (status)
goto out;
}
@@ -3545,7 +3542,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
- args.size = file_modified ? size : args.stat.size;
+ args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3e15b72f421d..7cd513e59305 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -705,8 +705,10 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
- if (err >= 0 &&
- !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active)
+ nfsd_last_thread(net);
+ else if (err >= 0 &&
+ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
svc_get(nn->nfsd_serv);
nfsd_put(net);
@@ -757,6 +759,9 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
+ if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active)
+ nfsd_last_thread(net);
+
nfsd_put(net);
return err;
}
@@ -1510,11 +1515,10 @@ int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb)
int ret = -ENODEV;
mutex_lock(&nfsd_mutex);
- if (nn->nfsd_serv) {
- svc_get(nn->nfsd_serv);
+ if (nn->nfsd_serv)
ret = 0;
- }
- mutex_unlock(&nfsd_mutex);
+ else
+ mutex_unlock(&nfsd_mutex);
return ret;
}
@@ -1686,8 +1690,6 @@ out:
*/
int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb)
{
- mutex_lock(&nfsd_mutex);
- nfsd_put(sock_net(cb->skb->sk));
mutex_unlock(&nfsd_mutex);
return 0;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index f5ff42f41ee7..3286ffacbc56 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -155,6 +155,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
void nfsd_reset_versions(struct nfsd_net *nn);
int nfsd_create_serv(struct net *net);
+void nfsd_last_thread(struct net *net);
extern int nfsd_max_blksize;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fe61d9bbcc1f..7a2bc8e82a63 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -542,7 +542,7 @@ static struct notifier_block nfsd_inet6addr_notifier = {
/* Only used under nfsd_mutex, so this atomic may be overkill: */
static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
-static void nfsd_last_thread(struct net *net)
+void nfsd_last_thread(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_serv *serv = nn->nfsd_serv;
@@ -955,7 +955,6 @@ nfsd(void *vrqstp)
rqstp->rq_server->sv_maxconn = nn->max_connections;
svc_recv(rqstp);
- validate_process_creds();
}
atomic_dec(&nfsdstats.th_cnt);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index f96eaa8e9413..41bdc913fa71 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -117,24 +117,6 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
-struct nfs4_cb_fattr {
- struct nfsd4_callback ncf_getattr;
- u32 ncf_cb_status;
- u32 ncf_cb_bmap[1];
-
- /* from CB_GETATTR reply */
- u64 ncf_cb_change;
- u64 ncf_cb_fsize;
-
- unsigned long ncf_cb_flags;
- bool ncf_file_modified;
- u64 ncf_initial_cinfo;
- u64 ncf_cur_fsize;
-};
-
-/* bits for ncf_cb_flags */
-#define CB_GETATTR_BUSY 0
-
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -168,9 +150,6 @@ struct nfs4_delegation {
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
-
- /* for CB_GETATTR */
- struct nfs4_cb_fattr dl_cb_fattr;
};
#define cb_to_delegation(cb) \
@@ -661,7 +640,6 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
NFSPROC4_CLNT_CB_RECALL_ANY,
- NFSPROC4_CLNT_CB_GETATTR,
};
/* Returns true iff a is later than b: */
@@ -754,6 +732,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct inode *inode, bool *file_modified, u64 *size);
-extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
+ struct inode *inode);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fbbea7498f02..e01e4e2acbd9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -901,7 +901,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int host_err;
bool retried = false;
- validate_process_creds();
/*
* If we get here, then the client has already done an "open",
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -926,7 +925,6 @@ retry:
}
err = nfserrno(host_err);
}
- validate_process_creds();
return err;
}
@@ -943,12 +941,7 @@ int
nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
struct file **filp)
{
- int err;
-
- validate_process_creds();
- err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
- validate_process_creds();
- return err;
+ return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
}
/*
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index e8b00309c449..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -54,21 +54,3 @@
#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
-
-/*
- * 1: CB_GETATTR opcode (32-bit)
- * N: file_handle
- * 1: number of entry in attribute array (32-bit)
- * 1: entry 0 in attribute array (32-bit)
- */
-#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
- cb_sequence_enc_sz + \
- 1 + enc_nfs4_fh_sz + 1 + 1)
-/*
- * 4: fattr_bitmap_maxsz
- * 1: attribute array len
- * 2: change attr (64-bit)
- * 2: size (64-bit)
- */
-#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
- cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 2c6078a6b8ec..58ca7c936393 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
- if (!ret) {
- mark_buffer_dirty(bh);
- nilfs_mdt_mark_dirty(sufile);
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ if (ret)
+ goto out_sem;
+
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ if (unlikely(nilfs_segment_usage_error(su))) {
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+
+ kunmap_atomic(kaddr);
+ brelse(bh);
+ if (nilfs_segment_is_active(nilfs, segnum)) {
+ nilfs_error(sufile->i_sb,
+ "active segment %llu is erroneous",
+ (unsigned long long)segnum);
+ } else {
+ /*
+ * Segments marked erroneous are never allocated by
+ * nilfs_sufile_alloc(); only active segments, ie,
+ * the segments indexed by ns_segnum or ns_nextnum,
+ * can be erroneous here.
+ */
+ WARN_ON_ONCE(1);
+ }
+ ret = -EIO;
+ } else {
nilfs_segment_usage_set_dirty(su);
kunmap_atomic(kaddr);
+ mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(sufile);
brelse(bh);
}
+out_sem:
up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
@@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
kaddr = kmap_atomic(bh->b_page);
su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- WARN_ON(nilfs_segment_usage_error(su));
- if (modtime)
+ if (modtime) {
+ /*
+ * Check segusage error and set su_lastmod only when updating
+ * this entry with a valid timestamp, not for cancellation.
+ */
+ WARN_ON_ONCE(nilfs_segment_usage_error(su));
su->su_lastmod = cpu_to_le64(modtime);
+ }
su->su_nblocks = cpu_to_le32(nblocks);
kunmap_atomic(kaddr);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 0f0667957c81..71400496ed36 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -716,7 +716,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
goto failed_sbh;
}
nilfs_release_super_block(nilfs);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ nilfs_err(sb, "bad blocksize %d", blocksize);
+ err = -EINVAL;
+ goto out;
+ }
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
diff --git a/fs/open.c b/fs/open.c
index 02dc608d40d8..3494a9cd8046 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1088,8 +1088,6 @@ struct file *dentry_open(const struct path *path, int flags,
int error;
struct file *f;
- validate_creds(cred);
-
/* We must always pass in a valid mount pointer. */
BUG_ON(!path->mnt);
@@ -1128,7 +1126,6 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
struct file *f;
int error;
- validate_creds(cred);
f = alloc_empty_file(flags, cred);
if (IS_ERR(f))
return f;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ef2eb12906da..435b61054b5b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1982,15 +1982,31 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
struct pagemap_scan_private *p = walk->private;
struct vm_area_struct *vma = walk->vma;
unsigned long vma_category = 0;
+ bool wp_allowed = userfaultfd_wp_async(vma) &&
+ userfaultfd_wp_use_markers(vma);
- if (userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma))
- vma_category |= PAGE_IS_WPALLOWED;
- else if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
- return -EPERM;
+ if (!wp_allowed) {
+ /* User requested explicit failure over wp-async capability */
+ if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
+ return -EPERM;
+ /*
+ * User requires wr-protect, and allows silently skipping
+ * unsupported vmas.
+ */
+ if (p->arg.flags & PM_SCAN_WP_MATCHING)
+ return 1;
+ /*
+ * Then the request doesn't involve wr-protects at all,
+ * fall through to the rest checks, and allow vma walk.
+ */
+ }
if (vma->vm_flags & VM_PFNMAP)
return 1;
+ if (wp_allowed)
+ vma_category |= PAGE_IS_WPALLOWED;
+
if (!pagemap_scan_is_interesting_vma(vma_category, p))
return 1;
@@ -2140,7 +2156,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
return 0;
}
- if (!p->vec_out) {
+ if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) {
/* Fast path for performing exclusive WP */
for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
if (pte_uffd_wp(ptep_get(pte)))
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 59f6b8e32cc9..d64a306a414b 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -291,16 +291,23 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId);
#endif /* CIFS_DEBUG2 */
- rc = -EINVAL;
+
if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) {
spin_unlock(&cfids->cfid_list_lock);
+ rc = -EINVAL;
+ goto oshr_free;
+ }
+
+ rc = smb2_parse_contexts(server, rsp_iov,
+ &oparms.fid->epoch,
+ oparms.fid->lease_key,
+ &oplock, NULL, NULL);
+ if (rc) {
+ spin_unlock(&cfids->cfid_list_lock);
goto oshr_free;
}
- smb2_parse_contexts(server, o_rsp,
- &oparms.fid->epoch,
- oparms.fid->lease_key, &oplock,
- NULL, NULL);
+ rc = -EINVAL;
if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) {
spin_unlock(&cfids->cfid_list_lock);
goto oshr_free;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index ea3a7a668b45..2131638f26d0 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1196,32 +1196,103 @@ const struct inode_operations cifs_symlink_inode_ops = {
.listxattr = cifs_listxattr,
};
+/*
+ * Advance the EOF marker to after the source range.
+ */
+static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *src_cifsi,
+ struct cifs_tcon *src_tcon,
+ unsigned int xid, loff_t src_end)
+{
+ struct cifsFileInfo *writeable_srcfile;
+ int rc = -EINVAL;
+
+ writeable_srcfile = find_writable_file(src_cifsi, FIND_WR_FSUID_ONLY);
+ if (writeable_srcfile) {
+ if (src_tcon->ses->server->ops->set_file_size)
+ rc = src_tcon->ses->server->ops->set_file_size(
+ xid, src_tcon, writeable_srcfile,
+ src_inode->i_size, true /* no need to set sparse */);
+ else
+ rc = -ENOSYS;
+ cifsFileInfo_put(writeable_srcfile);
+ cifs_dbg(FYI, "SetFSize for copychunk rc = %d\n", rc);
+ }
+
+ if (rc < 0)
+ goto set_failed;
+
+ netfs_resize_file(&src_cifsi->netfs, src_end);
+ fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end);
+ return 0;
+
+set_failed:
+ return filemap_write_and_wait(src_inode->i_mapping);
+}
+
+/*
+ * Flush out either the folio that overlaps the beginning of a range in which
+ * pos resides or the folio that overlaps the end of a range unless that folio
+ * is entirely within the range we're going to invalidate. We extend the flush
+ * bounds to encompass the folio.
+ */
+static int cifs_flush_folio(struct inode *inode, loff_t pos, loff_t *_fstart, loff_t *_fend,
+ bool first)
+{
+ struct folio *folio;
+ unsigned long long fpos, fend;
+ pgoff_t index = pos / PAGE_SIZE;
+ size_t size;
+ int rc = 0;
+
+ folio = filemap_get_folio(inode->i_mapping, index);
+ if (IS_ERR(folio))
+ return 0;
+
+ size = folio_size(folio);
+ fpos = folio_pos(folio);
+ fend = fpos + size - 1;
+ *_fstart = min_t(unsigned long long, *_fstart, fpos);
+ *_fend = max_t(unsigned long long, *_fend, fend);
+ if ((first && pos == fpos) || (!first && pos == fend))
+ goto out;
+
+ rc = filemap_write_and_wait_range(inode->i_mapping, fpos, fend);
+out:
+ folio_put(folio);
+ return rc;
+}
+
static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, loff_t len,
unsigned int remap_flags)
{
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
+ struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode);
+ struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode);
struct cifsFileInfo *smb_file_src = src_file->private_data;
- struct cifsFileInfo *smb_file_target;
- struct cifs_tcon *target_tcon;
+ struct cifsFileInfo *smb_file_target = dst_file->private_data;
+ struct cifs_tcon *target_tcon, *src_tcon;
+ unsigned long long destend, fstart, fend, new_size;
unsigned int xid;
int rc;
- if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ if (remap_flags & REMAP_FILE_DEDUP)
+ return -EOPNOTSUPP;
+ if (remap_flags & ~REMAP_FILE_ADVISORY)
return -EINVAL;
cifs_dbg(FYI, "clone range\n");
xid = get_xid();
- if (!src_file->private_data || !dst_file->private_data) {
+ if (!smb_file_src || !smb_file_target) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
goto out;
}
- smb_file_target = dst_file->private_data;
+ src_tcon = tlink_tcon(smb_file_src->tlink);
target_tcon = tlink_tcon(smb_file_target->tlink);
/*
@@ -1234,20 +1305,63 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
if (len == 0)
len = src_inode->i_size - off;
- cifs_dbg(FYI, "about to flush pages\n");
- /* should we flush first and last page first */
- truncate_inode_pages_range(&target_inode->i_data, destoff,
- PAGE_ALIGN(destoff + len)-1);
+ cifs_dbg(FYI, "clone range\n");
- if (target_tcon->ses->server->ops->duplicate_extents)
+ /* Flush the source buffer */
+ rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
+ off + len - 1);
+ if (rc)
+ goto unlock;
+
+ /* The server-side copy will fail if the source crosses the EOF marker.
+ * Advance the EOF marker after the flush above to the end of the range
+ * if it's short of that.
+ */
+ if (src_cifsi->netfs.remote_i_size < off + len) {
+ rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
+ if (rc < 0)
+ goto unlock;
+ }
+
+ new_size = destoff + len;
+ destend = destoff + len - 1;
+
+ /* Flush the folios at either end of the destination range to prevent
+ * accidental loss of dirty data outside of the range.
+ */
+ fstart = destoff;
+ fend = destend;
+
+ rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true);
+ if (rc)
+ goto unlock;
+ rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
+ if (rc)
+ goto unlock;
+
+ /* Discard all the folios that overlap the destination region. */
+ cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend);
+ truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
+
+ fscache_invalidate(cifs_inode_cookie(target_inode), NULL,
+ i_size_read(target_inode), 0);
+
+ rc = -EOPNOTSUPP;
+ if (target_tcon->ses->server->ops->duplicate_extents) {
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
smb_file_src, smb_file_target, off, len, destoff);
- else
- rc = -EOPNOTSUPP;
+ if (rc == 0 && new_size > i_size_read(target_inode)) {
+ truncate_setsize(target_inode, new_size);
+ netfs_resize_file(&target_cifsi->netfs, new_size);
+ fscache_resize_cookie(cifs_inode_cookie(target_inode),
+ new_size);
+ }
+ }
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I(target_inode)->time = 0;
+unlock:
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories(src_inode, target_inode);
@@ -1263,10 +1377,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
{
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
+ struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode);
struct cifsFileInfo *smb_file_src;
struct cifsFileInfo *smb_file_target;
struct cifs_tcon *src_tcon;
struct cifs_tcon *target_tcon;
+ unsigned long long destend, fstart, fend;
ssize_t rc;
cifs_dbg(FYI, "copychunk range\n");
@@ -1306,13 +1422,41 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
if (rc)
goto unlock;
- /* should we flush first and last page first */
- truncate_inode_pages(&target_inode->i_data, 0);
+ /* The server-side copy will fail if the source crosses the EOF marker.
+ * Advance the EOF marker after the flush above to the end of the range
+ * if it's short of that.
+ */
+ if (src_cifsi->server_eof < off + len) {
+ rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
+ if (rc < 0)
+ goto unlock;
+ }
+
+ destend = destoff + len - 1;
+
+ /* Flush the folios at either end of the destination range to prevent
+ * accidental loss of dirty data outside of the range.
+ */
+ fstart = destoff;
+ fend = destend;
+
+ rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true);
+ if (rc)
+ goto unlock;
+ rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
+ if (rc)
+ goto unlock;
+
+ /* Discard all the folios that overlap the destination region. */
+ truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
rc = file_modified(dst_file);
- if (!rc)
+ if (!rc) {
rc = target_tcon->ses->server->ops->copychunk_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
+ if (rc > 0 && destoff + rc > i_size_read(target_inode))
+ truncate_setsize(target_inode, destoff + rc);
+ }
file_accessed(src_file);
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index f896f60c924b..9dc6dc2754c2 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -402,13 +402,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
spin_unlock(&server->srv_lock);
cifs_swn_reset_server_dstaddr(server);
cifs_server_unlock(server);
-
- /* increase ref count which reconnect work will drop */
- spin_lock(&cifs_tcp_ses_lock);
- server->srv_count++;
- spin_unlock(&cifs_tcp_ses_lock);
- if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
- cifs_put_tcp_session(server, false);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
}
} while (server->tcpStatus == CifsNeedReconnect);
@@ -538,13 +532,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
spin_unlock(&server->srv_lock);
cifs_swn_reset_server_dstaddr(server);
cifs_server_unlock(server);
-
- /* increase ref count which reconnect work will drop */
- spin_lock(&cifs_tcp_ses_lock);
- server->srv_count++;
- spin_unlock(&cifs_tcp_ses_lock);
- if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
- cifs_put_tcp_session(server, false);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
} while (server->tcpStatus == CifsNeedReconnect);
mutex_lock(&server->refpath_lock);
@@ -1620,25 +1608,22 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- /* For secondary channels, we pick up ref-count on the primary server */
- if (SERVER_IS_CHAN(server))
- cifs_put_tcp_session(server->primary_server, from_reconnect);
-
cancel_delayed_work_sync(&server->echo);
- if (from_reconnect) {
+ if (from_reconnect)
/*
* Avoid deadlock here: reconnect work calls
* cifs_put_tcp_session() at its end. Need to be sure
* that reconnect work does nothing with server pointer after
* that step.
*/
- if (cancel_delayed_work(&server->reconnect))
- cifs_put_tcp_session(server, from_reconnect);
- } else {
- if (cancel_delayed_work_sync(&server->reconnect))
- cifs_put_tcp_session(server, from_reconnect);
- }
+ cancel_delayed_work(&server->reconnect);
+ else
+ cancel_delayed_work_sync(&server->reconnect);
+
+ /* For secondary channels, we pick up ref-count on the primary server */
+ if (SERVER_IS_CHAN(server))
+ cifs_put_tcp_session(server->primary_server, from_reconnect);
spin_lock(&server->srv_lock);
server->tcpStatus = CifsExiting;
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 32dfa0f7a78c..e20b4354e703 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -313,6 +313,9 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
char *
smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr)
{
+ const int max_off = 4096;
+ const int max_len = 128 * 1024;
+
*off = 0;
*len = 0;
@@ -384,29 +387,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr)
* Invalid length or offset probably means data area is invalid, but
* we have little choice but to ignore the data area in this case.
*/
- if (*off > 4096) {
- cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off);
- *len = 0;
- *off = 0;
- } else if (*off < 0) {
- cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n",
- *off);
+ if (unlikely(*off < 0 || *off > max_off ||
+ *len < 0 || *len > max_len)) {
+ cifs_dbg(VFS, "%s: invalid data area (off=%d len=%d)\n",
+ __func__, *off, *len);
*off = 0;
*len = 0;
- } else if (*len < 0) {
- cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n",
- *len);
- *len = 0;
- } else if (*len > 128 * 1024) {
- cifs_dbg(VFS, "data area larger than 128K: %d\n", *len);
+ } else if (*off == 0) {
*len = 0;
}
/* return pointer to beginning of data area, ie offset from SMB start */
- if ((*off != 0) && (*len != 0))
+ if (*off > 0 && *len > 0)
return (char *)shdr + *off;
- else
- return NULL;
+ return NULL;
}
/*
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 45931115f475..8f6f0a38b886 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2836,6 +2836,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
usleep_range(512, 2048);
} while (++retry_count < 5);
+ if (!rc && !dfs_rsp)
+ rc = -EIO;
if (rc) {
if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP)
cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc);
@@ -3001,7 +3003,7 @@ static int smb2_query_reparse_point(const unsigned int xid,
struct kvec *rsp_iov;
struct smb2_ioctl_rsp *ioctl_rsp;
struct reparse_data_buffer *reparse_buf;
- u32 plen;
+ u32 off, count, len;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
@@ -3082,16 +3084,22 @@ static int smb2_query_reparse_point(const unsigned int xid,
*/
if (rc == 0) {
/* See MS-FSCC 2.3.23 */
+ off = le32_to_cpu(ioctl_rsp->OutputOffset);
+ count = le32_to_cpu(ioctl_rsp->OutputCount);
+ if (check_add_overflow(off, count, &len) ||
+ len > rsp_iov[1].iov_len) {
+ cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n",
+ __func__, off, count);
+ rc = -EIO;
+ goto query_rp_exit;
+ }
- reparse_buf = (struct reparse_data_buffer *)
- ((char *)ioctl_rsp +
- le32_to_cpu(ioctl_rsp->OutputOffset));
- plen = le32_to_cpu(ioctl_rsp->OutputCount);
-
- if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) >
- rsp_iov[1].iov_len) {
- cifs_tcon_dbg(FYI, "srv returned invalid ioctl len: %d\n",
- plen);
+ reparse_buf = (void *)((u8 *)ioctl_rsp + off);
+ len = sizeof(*reparse_buf);
+ if (count < len ||
+ count < le16_to_cpu(reparse_buf->ReparseDataLength) + len) {
+ cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n",
+ __func__, off, count);
rc = -EIO;
goto query_rp_exit;
}
@@ -4941,6 +4949,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
struct smb2_hdr *shdr;
unsigned int pdu_length = server->pdu_size;
unsigned int buf_size;
+ unsigned int next_cmd;
struct mid_q_entry *mid_entry;
int next_is_large;
char *next_buffer = NULL;
@@ -4969,14 +4978,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
next_is_large = server->large_buf;
one_more:
shdr = (struct smb2_hdr *)buf;
- if (shdr->NextCommand) {
+ next_cmd = le32_to_cpu(shdr->NextCommand);
+ if (next_cmd) {
+ if (WARN_ON_ONCE(next_cmd > pdu_length))
+ return -1;
if (next_is_large)
next_buffer = (char *)cifs_buf_get();
else
next_buffer = (char *)cifs_small_buf_get();
- memcpy(next_buffer,
- buf + le32_to_cpu(shdr->NextCommand),
- pdu_length - le32_to_cpu(shdr->NextCommand));
+ memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd);
}
mid_entry = smb2_find_mid(server, buf);
@@ -5000,8 +5010,8 @@ one_more:
else
ret = cifs_handle_standard(server, mid_entry);
- if (ret == 0 && shdr->NextCommand) {
- pdu_length -= le32_to_cpu(shdr->NextCommand);
+ if (ret == 0 && next_cmd) {
+ pdu_length -= next_cmd;
server->large_buf = next_is_large;
if (next_is_large)
server->bigbuf = buf = next_buffer;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 395e1230ddbc..c571760ad39a 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -158,7 +158,7 @@ out:
static int
smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
- struct TCP_Server_Info *server)
+ struct TCP_Server_Info *server, bool from_reconnect)
{
int rc = 0;
struct nls_table *nls_codepage = NULL;
@@ -331,7 +331,7 @@ again:
* as cifs_put_tcp_session takes a higher lock
* i.e. cifs_tcp_ses_lock
*/
- cifs_put_tcp_session(server, 1);
+ cifs_put_tcp_session(server, from_reconnect);
server->terminate = true;
cifs_signal_cifsd_for_reconnect(server, false);
@@ -499,7 +499,7 @@ static int smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
{
int rc;
- rc = smb2_reconnect(smb2_command, tcon, server);
+ rc = smb2_reconnect(smb2_command, tcon, server, false);
if (rc)
return rc;
@@ -2236,17 +2236,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info,
posix->nlink, posix->mode, posix->reparse_tag);
}
-void
-smb2_parse_contexts(struct TCP_Server_Info *server,
- struct smb2_create_rsp *rsp,
- unsigned int *epoch, char *lease_key, __u8 *oplock,
- struct smb2_file_all_info *buf,
- struct create_posix_rsp *posix)
+int smb2_parse_contexts(struct TCP_Server_Info *server,
+ struct kvec *rsp_iov,
+ unsigned int *epoch,
+ char *lease_key, __u8 *oplock,
+ struct smb2_file_all_info *buf,
+ struct create_posix_rsp *posix)
{
- char *data_offset;
+ struct smb2_create_rsp *rsp = rsp_iov->iov_base;
struct create_context *cc;
- unsigned int next;
- unsigned int remaining;
+ size_t rem, off, len;
+ size_t doff, dlen;
+ size_t noff, nlen;
char *name;
static const char smb3_create_tag_posix[] = {
0x93, 0xAD, 0x25, 0x50, 0x9C,
@@ -2255,45 +2256,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server,
};
*oplock = 0;
- data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
- remaining = le32_to_cpu(rsp->CreateContextsLength);
- cc = (struct create_context *)data_offset;
+
+ off = le32_to_cpu(rsp->CreateContextsOffset);
+ rem = le32_to_cpu(rsp->CreateContextsLength);
+ if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len)
+ return -EINVAL;
+ cc = (struct create_context *)((u8 *)rsp + off);
/* Initialize inode number to 0 in case no valid data in qfid context */
if (buf)
buf->IndexNumber = 0;
- while (remaining >= sizeof(struct create_context)) {
- name = le16_to_cpu(cc->NameOffset) + (char *)cc;
- if (le16_to_cpu(cc->NameLength) == 4 &&
- strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0)
- *oplock = server->ops->parse_lease_buf(cc, epoch,
- lease_key);
- else if (buf && (le16_to_cpu(cc->NameLength) == 4) &&
- strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0)
- parse_query_id_ctxt(cc, buf);
- else if ((le16_to_cpu(cc->NameLength) == 16)) {
- if (posix &&
- memcmp(name, smb3_create_tag_posix, 16) == 0)
+ while (rem >= sizeof(*cc)) {
+ doff = le16_to_cpu(cc->DataOffset);
+ dlen = le32_to_cpu(cc->DataLength);
+ if (check_add_overflow(doff, dlen, &len) || len > rem)
+ return -EINVAL;
+
+ noff = le16_to_cpu(cc->NameOffset);
+ nlen = le16_to_cpu(cc->NameLength);
+ if (noff + nlen >= doff)
+ return -EINVAL;
+
+ name = (char *)cc + noff;
+ switch (nlen) {
+ case 4:
+ if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) {
+ *oplock = server->ops->parse_lease_buf(cc, epoch,
+ lease_key);
+ } else if (buf &&
+ !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) {
+ parse_query_id_ctxt(cc, buf);
+ }
+ break;
+ case 16:
+ if (posix && !memcmp(name, smb3_create_tag_posix, 16))
parse_posix_ctxt(cc, buf, posix);
+ break;
+ default:
+ cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n",
+ __func__, nlen, dlen);
+ if (IS_ENABLED(CONFIG_CIFS_DEBUG2))
+ cifs_dump_mem("context data: ", cc, dlen);
+ break;
}
- /* else {
- cifs_dbg(FYI, "Context not matched with len %d\n",
- le16_to_cpu(cc->NameLength));
- cifs_dump_mem("Cctxt name: ", name, 4);
- } */
-
- next = le32_to_cpu(cc->Next);
- if (!next)
+
+ off = le32_to_cpu(cc->Next);
+ if (!off)
break;
- remaining -= next;
- cc = (struct create_context *)((char *)cc + next);
+ if (check_sub_overflow(rem, off, &rem))
+ return -EINVAL;
+ cc = (struct create_context *)((u8 *)cc + off);
}
if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE)
*oplock = rsp->OplockLevel;
- return;
+ return 0;
}
static int
@@ -3124,8 +3143,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
}
- smb2_parse_contexts(server, rsp, &oparms->fid->epoch,
- oparms->fid->lease_key, oplock, buf, posix);
+ rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch,
+ oparms->fid->lease_key, oplock, buf, posix);
creat_exit:
SMB2_open_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
@@ -3895,6 +3914,15 @@ void smb2_reconnect_server(struct work_struct *work)
int rc;
bool resched = false;
+ /* first check if ref count has reached 0, if not inc ref count */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (!server->srv_count) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return;
+ }
+ server->srv_count++;
+ spin_unlock(&cifs_tcp_ses_lock);
+
/* If server is a channel, select the primary channel */
pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
@@ -3952,11 +3980,10 @@ void smb2_reconnect_server(struct work_struct *work)
}
spin_unlock(&ses->chan_lock);
}
-
spin_unlock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
- rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+ rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true);
if (!rc)
cifs_reopen_persistent_handles(tcon);
else
@@ -3989,7 +4016,7 @@ void smb2_reconnect_server(struct work_struct *work)
/* now reconnect sessions for necessary channels */
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
tcon->ses = ses;
- rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+ rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true);
if (rc)
resched = true;
list_del_init(&ses->rlist);
@@ -3999,13 +4026,8 @@ void smb2_reconnect_server(struct work_struct *work)
done:
cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
- if (resched) {
+ if (resched)
queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
- mutex_unlock(&pserver->reconnect_mutex);
-
- /* no need to put tcp session as we're retrying */
- return;
- }
mutex_unlock(&pserver->reconnect_mutex);
/* now we can safely release srv struct */
@@ -4029,12 +4051,7 @@ SMB2_echo(struct TCP_Server_Info *server)
server->ops->need_neg(server)) {
spin_unlock(&server->srv_lock);
/* No need to send echo on newly established connections */
- spin_lock(&cifs_tcp_ses_lock);
- server->srv_count++;
- spin_unlock(&cifs_tcp_ses_lock);
- if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
- cifs_put_tcp_session(server, false);
-
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
return rc;
}
spin_unlock(&server->srv_lock);
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 46eff9ec302a..0e371f7e2854 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -251,11 +251,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
-extern void smb2_parse_contexts(struct TCP_Server_Info *server,
- struct smb2_create_rsp *rsp,
- unsigned int *epoch, char *lease_key,
- __u8 *oplock, struct smb2_file_all_info *buf,
- struct create_posix_rsp *posix);
+int smb2_parse_contexts(struct TCP_Server_Info *server,
+ struct kvec *rsp_iov,
+ unsigned int *epoch,
+ char *lease_key, __u8 *oplock,
+ struct smb2_file_all_info *buf,
+ struct create_posix_rsp *posix);
+
extern int smb3_encryption_required(const struct cifs_tcon *tcon);
extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size);
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 9fbaaa387dcc..57f2343164a3 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1145,7 +1145,7 @@ struct smb2_server_client_notification {
#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */
#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ"
#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC"
-#define SMB2_CREATE_ALLOCATION_SIZE "AISi"
+#define SMB2_CREATE_ALLOCATION_SIZE "AlSi"
#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp"
#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid"
@@ -1253,6 +1253,7 @@ struct create_mxac_rsp {
#define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04)
#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE cpu_to_le32(0x04)
#define SMB2_LEASE_KEY_SIZE 16
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 50c68beb71d6..562b180459a1 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -102,9 +102,10 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx)
lease->new_state = 0;
lease->flags = lctx->flags;
lease->duration = lctx->duration;
+ lease->is_dir = lctx->is_dir;
memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE);
lease->version = lctx->version;
- lease->epoch = 0;
+ lease->epoch = le16_to_cpu(lctx->epoch);
INIT_LIST_HEAD(&opinfo->lease_entry);
opinfo->o_lease = lease;
@@ -395,8 +396,8 @@ void close_id_del_oplock(struct ksmbd_file *fp)
{
struct oplock_info *opinfo;
- if (S_ISDIR(file_inode(fp->filp)->i_mode))
- return;
+ if (fp->reserve_lease_break)
+ smb_lazy_parent_lease_break_close(fp);
opinfo = opinfo_get(fp);
if (!opinfo)
@@ -543,12 +544,13 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci,
/* upgrading lease */
if ((atomic_read(&ci->op_count) +
atomic_read(&ci->sop_count)) == 1) {
- if (lease->state ==
- (lctx->req_state & lease->state)) {
+ if (lease->state != SMB2_LEASE_NONE_LE &&
+ lease->state == (lctx->req_state & lease->state)) {
lease->state |= lctx->req_state;
if (lctx->req_state &
SMB2_LEASE_WRITE_CACHING_LE)
lease_read_to_write(opinfo);
+
}
} else if ((atomic_read(&ci->op_count) +
atomic_read(&ci->sop_count)) > 1) {
@@ -900,7 +902,8 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
lease->new_state =
SMB2_LEASE_READ_CACHING_LE;
} else {
- if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE &&
+ !lease->is_dir)
lease->new_state =
SMB2_LEASE_READ_CACHING_LE;
else
@@ -1032,6 +1035,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2)
SMB2_LEASE_KEY_SIZE);
lease2->duration = lease1->duration;
lease2->flags = lease1->flags;
+ lease2->epoch = lease1->epoch++;
}
static int add_lease_global_list(struct oplock_info *opinfo)
@@ -1081,6 +1085,89 @@ static void set_oplock_level(struct oplock_info *opinfo, int level,
}
}
+void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx)
+{
+ struct oplock_info *opinfo;
+ struct ksmbd_inode *p_ci = NULL;
+
+ if (lctx->version != 2)
+ return;
+
+ p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent);
+ if (!p_ci)
+ return;
+
+ read_lock(&p_ci->m_lock);
+ list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) {
+ if (!opinfo->is_lease)
+ continue;
+
+ if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE &&
+ (!(lctx->flags & SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) ||
+ !compare_guid_key(opinfo, fp->conn->ClientGUID,
+ lctx->parent_lease_key))) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ continue;
+ }
+
+ read_unlock(&p_ci->m_lock);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ opinfo_conn_put(opinfo);
+ read_lock(&p_ci->m_lock);
+ }
+ }
+ read_unlock(&p_ci->m_lock);
+
+ ksmbd_inode_put(p_ci);
+}
+
+void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo;
+ struct ksmbd_inode *p_ci = NULL;
+
+ rcu_read_lock();
+ opinfo = rcu_dereference(fp->f_opinfo);
+ rcu_read_unlock();
+
+ if (!opinfo->is_lease || opinfo->o_lease->version != 2)
+ return;
+
+ p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent);
+ if (!p_ci)
+ return;
+
+ read_lock(&p_ci->m_lock);
+ list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) {
+ if (!opinfo->is_lease)
+ continue;
+
+ if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ continue;
+ }
+ read_unlock(&p_ci->m_lock);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ opinfo_conn_put(opinfo);
+ read_lock(&p_ci->m_lock);
+ }
+ }
+ read_unlock(&p_ci->m_lock);
+
+ ksmbd_inode_put(p_ci);
+}
+
/**
* smb_grant_oplock() - handle oplock/lease request on file open
* @work: smb work
@@ -1104,10 +1191,6 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
bool prev_op_has_lease;
__le32 prev_op_state = 0;
- /* not support directory lease */
- if (S_ISDIR(file_inode(fp->filp)->i_mode))
- return 0;
-
opinfo = alloc_opinfo(work, pid, tid);
if (!opinfo)
return -ENOMEM;
@@ -1364,6 +1447,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
memcpy(buf->lcontext.LeaseKey, lease->lease_key,
SMB2_LEASE_KEY_SIZE);
buf->lcontext.LeaseFlags = lease->flags;
+ buf->lcontext.Epoch = cpu_to_le16(++lease->epoch);
buf->lcontext.LeaseState = lease->state;
memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key,
SMB2_LEASE_KEY_SIZE);
@@ -1400,10 +1484,11 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
/**
* parse_lease_state() - parse lease context containted in file open request
* @open_req: buffer containing smb2 file open(create) request
+ * @is_dir: whether leasing file is directory
*
* Return: oplock state, -ENOENT if create lease context not found
*/
-struct lease_ctx_info *parse_lease_state(void *open_req)
+struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir)
{
struct create_context *cc;
struct smb2_create_req *req = (struct smb2_create_req *)open_req;
@@ -1421,8 +1506,14 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
- lreq->req_state = lc->lcontext.LeaseState;
+ if (is_dir) {
+ lreq->req_state = lc->lcontext.LeaseState &
+ ~SMB2_LEASE_WRITE_CACHING_LE;
+ lreq->is_dir = true;
+ } else
+ lreq->req_state = lc->lcontext.LeaseState;
lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->epoch = lc->lcontext.Epoch;
lreq->duration = lc->lcontext.LeaseDuration;
memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey,
SMB2_LEASE_KEY_SIZE);
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index 4b0fe6da7694..5b93ea9196c0 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -34,7 +34,9 @@ struct lease_ctx_info {
__le32 flags;
__le64 duration;
__u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
+ __le16 epoch;
int version;
+ bool is_dir;
};
struct lease_table {
@@ -53,6 +55,7 @@ struct lease {
__u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
int version;
unsigned short epoch;
+ bool is_dir;
struct lease_table *l_lb;
};
@@ -108,7 +111,7 @@ void opinfo_put(struct oplock_info *opinfo);
/* Lease related functions */
void create_lease_buf(u8 *rbuf, struct lease *lease);
-struct lease_ctx_info *parse_lease_state(void *open_req);
+struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir);
__u8 smb2_map_lease_to_oplock(__le32 lease_state);
int lease_read_to_write(struct oplock_info *opinfo);
@@ -124,4 +127,7 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
struct lease_ctx_info *lctx);
void destroy_lease_table(struct ksmbd_conn *conn);
+void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx);
+void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp);
#endif /* __KSMBD_OPLOCK_H */
diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
index aed7704a0672..27a9dce3e03a 100644
--- a/fs/smb/server/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
@@ -221,7 +221,8 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
@@ -245,7 +246,8 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
(!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
@@ -270,7 +272,8 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
(!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index d369b98a6e10..652ab429bf2e 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2516,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, true);
if (rc)
ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
}
@@ -2732,10 +2732,6 @@ int smb2_open(struct ksmbd_work *work)
}
}
- req_op_level = req->RequestedOplockLevel;
- if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
- lc = parse_lease_state(req);
-
if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) {
pr_err("Invalid impersonationlevel : 0x%x\n",
le32_to_cpu(req->ImpersonationLevel));
@@ -3189,23 +3185,6 @@ int smb2_open(struct ksmbd_work *work)
goto err_out;
}
- rc = ksmbd_vfs_getattr(&path, &stat);
- if (rc)
- goto err_out;
-
- if (stat.result_mask & STATX_BTIME)
- fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
- else
- fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
- if (req->FileAttributes || fp->f_ci->m_fattr == 0)
- fp->f_ci->m_fattr =
- cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
-
- if (!created)
- smb2_update_xattrs(tcon, &path, fp);
- else
- smb2_new_xattrs(tcon, &path, fp);
-
if (file_present || created)
ksmbd_vfs_kern_path_unlock(&parent_path, &path);
@@ -3215,6 +3194,10 @@ int smb2_open(struct ksmbd_work *work)
need_truncate = 1;
}
+ req_op_level = req->RequestedOplockLevel;
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
+ lc = parse_lease_state(req, S_ISDIR(file_inode(filp)->i_mode));
+
share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
(req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
@@ -3225,6 +3208,13 @@ int smb2_open(struct ksmbd_work *work)
}
} else {
if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
+ /*
+ * Compare parent lease using parent key. If there is no
+ * a lease that has same parent key, Send lease break
+ * notification.
+ */
+ smb_send_parent_lease_break_noti(fp, lc);
+
req_op_level = smb2_map_lease_to_oplock(lc->req_state);
ksmbd_debug(SMB,
"lease req for(%s) req oplock state 0x%x, lease state 0x%x\n",
@@ -3295,6 +3285,23 @@ int smb2_open(struct ksmbd_work *work)
}
}
+ rc = ksmbd_vfs_getattr(&path, &stat);
+ if (rc)
+ goto err_out1;
+
+ if (stat.result_mask & STATX_BTIME)
+ fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
+ else
+ fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
+ if (req->FileAttributes || fp->f_ci->m_fattr == 0)
+ fp->f_ci->m_fattr =
+ cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
+
+ if (!created)
+ smb2_update_xattrs(tcon, &path, fp);
+ else
+ smb2_new_xattrs(tcon, &path, fp);
+
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
rsp->StructureSize = cpu_to_le16(89);
@@ -7080,6 +7087,7 @@ skip:
smb2_remove_blocked_lock,
argv);
if (rc) {
+ kfree(argv);
err = -ENOMEM;
goto out;
}
@@ -8211,6 +8219,11 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
le32_to_cpu(req->LeaseState));
}
+ if (ret < 0) {
+ rsp->hdr.Status = err;
+ goto err_out;
+ }
+
lease_state = lease->state;
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
@@ -8218,11 +8231,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
wake_up_interruptible_all(&opinfo->oplock_brk);
opinfo_put(opinfo);
- if (ret < 0) {
- rsp->hdr.Status = err;
- goto err_out;
- }
-
rsp->StructureSize = cpu_to_le16(36);
rsp->Reserved = 0;
rsp->Flags = 0;
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 9091dcd7a310..4277750a6da1 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -517,6 +517,9 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
}
}
+ /* Reserve lease break for parent dir at closing time */
+ fp->reserve_lease_break = true;
+
/* Do we need to break any of a levelII oplock? */
smb_break_all_levII_oplock(work, fp, 1);
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index ddf233994ddb..4e82ff627d12 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -87,6 +87,17 @@ static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
return __ksmbd_inode_lookup(fp->filp->f_path.dentry);
}
+struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d)
+{
+ struct ksmbd_inode *ci;
+
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(d);
+ read_unlock(&inode_hash_lock);
+
+ return ci;
+}
+
int ksmbd_query_inode_status(struct dentry *dentry)
{
struct ksmbd_inode *ci;
@@ -199,7 +210,7 @@ static void ksmbd_inode_free(struct ksmbd_inode *ci)
kfree(ci);
}
-static void ksmbd_inode_put(struct ksmbd_inode *ci)
+void ksmbd_inode_put(struct ksmbd_inode *ci)
{
if (atomic_dec_and_test(&ci->m_count))
ksmbd_inode_free(ci);
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 8325cf4527c4..a528f0cc775a 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -105,6 +105,7 @@ struct ksmbd_file {
struct ksmbd_readdir_data readdir_data;
int dot_dotdot[2];
unsigned int f_state;
+ bool reserve_lease_break;
};
static inline void set_ctx_actor(struct dir_context *ctx,
@@ -138,6 +139,8 @@ struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id);
struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
u64 pid);
void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
+struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d);
+void ksmbd_inode_put(struct ksmbd_inode *ci);
struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 581ce9519339..2dc730800f44 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -321,7 +321,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2,
compressed ? "" : "un", length);
}
- if (length < 0 || length > output->length ||
+ if (length <= 0 || length > output->length ||
(index + length) > msblk->bytes_used) {
res = -EIO;
goto out;
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 0b90869fd805..43e237864a42 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -546,6 +546,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
if (strcmp(ei_child->name, name) != 0)
continue;
ret = simple_lookup(dir, dentry, flags);
+ if (IS_ERR(ret))
+ goto out;
create_dir_dentry(ei, ei_child, ei_dentry, true);
created = true;
break;
@@ -568,6 +570,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
if (r <= 0)
continue;
ret = simple_lookup(dir, dentry, flags);
+ if (IS_ERR(ret))
+ goto out;
create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
fops, true);
break;
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 13ba34e6d64f..2acf191eb89e 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -245,7 +245,7 @@ struct folio *ufs_get_locked_folio(struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct folio *folio = filemap_lock_folio(mapping, index);
- if (!folio) {
+ if (IS_ERR(folio)) {
folio = read_mapping_folio(mapping, index, NULL);
if (IS_ERR(folio)) {