From 91ec6c85599b60c00caf4e9a9d6c4d6e5dd5e93c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 14 Aug 2023 13:05:30 +0200 Subject: Revert "fuse: in fuse_flush only wait if someone wants the return code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5a8bee63b10f6f2f52f6d22e109a4a147409842a. Jürg Billeter reports the following regression: Since v6.3-rc1 commit 5a8bee63b1 ("fuse: in fuse_flush only wait if someone wants the return code") `fput()` is called asynchronously if a file is closed as part of a process exiting, i.e., if there was no explicit `close()` before exit. If the file was open for writing, also `put_write_access()` is called asynchronously as part of the async `fput()`. If that newly written file is an executable, attempting to `execve()` the new file can fail with `ETXTBSY` if it's called after the writer process exited but before the async `fput()` has run. Reported-and-tested-by: "Jürg Billeter" Cc: # v6.3 Link: https://lore.kernel.org/all/4f66cded234462964899f2a661750d6798a57ec0.camel@bitron.ch/ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 89 +++++++++++++++++----------------------------------------- 1 file changed, 26 insertions(+), 63 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bc4115288eec..1c7599ed9062 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -19,7 +19,6 @@ #include #include #include -#include static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, int opcode, @@ -479,36 +478,48 @@ static void fuse_sync_writes(struct inode *inode) fuse_release_nowrite(inode); } -struct fuse_flush_args { - struct fuse_args args; - struct fuse_flush_in inarg; - struct work_struct work; - struct file *file; -}; - -static int fuse_do_flush(struct fuse_flush_args *fa) +static int fuse_flush(struct file *file, fl_owner_t id) { - int err; - struct inode *inode = file_inode(fa->file); + struct inode *inode = file_inode(file); struct fuse_mount *fm = get_fuse_mount(inode); + struct fuse_file *ff = file->private_data; + struct fuse_flush_in inarg; + FUSE_ARGS(args); + int err; + + if (fuse_is_bad(inode)) + return -EIO; + + if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache) + return 0; err = write_inode_now(inode, 1); if (err) - goto out; + return err; inode_lock(inode); fuse_sync_writes(inode); inode_unlock(inode); - err = filemap_check_errors(fa->file->f_mapping); + err = filemap_check_errors(file->f_mapping); if (err) - goto out; + return err; err = 0; if (fm->fc->no_flush) goto inval_attr_out; - err = fuse_simple_request(fm, &fa->args); + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; + inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); + args.opcode = FUSE_FLUSH; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.force = true; + + err = fuse_simple_request(fm, &args); if (err == -ENOSYS) { fm->fc->no_flush = 1; err = 0; @@ -521,57 +532,9 @@ inval_attr_out: */ if (!err && fm->fc->writeback_cache) fuse_invalidate_attr_mask(inode, STATX_BLOCKS); - -out: - fput(fa->file); - kfree(fa); return err; } -static void fuse_flush_async(struct work_struct *work) -{ - struct fuse_flush_args *fa = container_of(work, typeof(*fa), work); - - fuse_do_flush(fa); -} - -static int fuse_flush(struct file *file, fl_owner_t id) -{ - struct fuse_flush_args *fa; - struct inode *inode = file_inode(file); - struct fuse_mount *fm = get_fuse_mount(inode); - struct fuse_file *ff = file->private_data; - - if (fuse_is_bad(inode)) - return -EIO; - - if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache) - return 0; - - fa = kzalloc(sizeof(*fa), GFP_KERNEL); - if (!fa) - return -ENOMEM; - - fa->inarg.fh = ff->fh; - fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); - fa->args.opcode = FUSE_FLUSH; - fa->args.nodeid = get_node_id(inode); - fa->args.in_numargs = 1; - fa->args.in_args[0].size = sizeof(fa->inarg); - fa->args.in_args[0].value = &fa->inarg; - fa->args.force = true; - fa->file = get_file(file); - - /* Don't wait if the task is exiting */ - if (current->flags & PF_EXITING) { - INIT_WORK(&fa->work, fuse_flush_async); - schedule_work(&fa->work); - return 0; - } - - return fuse_do_flush(fa); -} - int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int datasync, int opcode) { -- cgit v1.2.3 From b8bd342d50cbf606666488488f9fea374aceb2d5 Mon Sep 17 00:00:00 2001 From: ruanmeisi Date: Tue, 25 Apr 2023 19:13:54 +0800 Subject: fuse: nlookup missing decrement in fuse_direntplus_link During our debugging of glusterfs, we found an Assertion failed error: inode_lookup >= nlookup, which was caused by the nlookup value in the kernel being greater than that in the FUSE file system. The issue was introduced by fuse_direntplus_link, where in the function, fuse_iget increments nlookup, and if d_splice_alias returns failure, fuse_direntplus_link returns failure without decrementing nlookup https://github.com/gluster/glusterfs/pull/4081 Signed-off-by: ruanmeisi Fixes: 0b05b18381ee ("fuse: implement NFS-like readdirplus support") Cc: # v3.9 Signed-off-by: Miklos Szeredi --- fs/fuse/readdir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index dc603479b30e..b3d498163f97 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -243,8 +243,16 @@ retry: dput(dentry); dentry = alias; } - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + if (!IS_ERR(inode)) { + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fi->lock); + fi->nlookup--; + spin_unlock(&fi->lock); + } return PTR_ERR(dentry); + } } if (fc->readdirplus_auto) set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); -- cgit v1.2.3 From 80e4f25262f9f1a5f08154fafaa6ac4371043d35 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 1 Aug 2023 16:06:45 +0800 Subject: fuse: invalidate page cache pages before direct write In FOPEN_DIRECT_IO, page cache may still be there for a file since private mmap is allowed. Direct write should respect that and invalidate the corresponding pages so that page cache readers don't get stale data. Signed-off-by: Hao Xu Tested-by: Jiachen Zhang Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1c7599ed9062..1aa7dde665aa 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1428,7 +1428,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, int write = flags & FUSE_DIO_WRITE; int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fm->fc; size_t nmax = write ? fc->max_write : fc->max_read; @@ -1440,6 +1441,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, int err = 0; struct fuse_io_args *ia; unsigned int max_pages; + bool fopen_direct_io = ff->open_flags & FOPEN_DIRECT_IO; max_pages = iov_iter_npages(iter, fc->max_pages); ia = fuse_io_alloc(io, max_pages); @@ -1454,6 +1456,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, inode_unlock(inode); } + if (fopen_direct_io && write) { + res = invalidate_inode_pages2_range(mapping, idx_from, idx_to); + if (res) { + fuse_io_free(ia); + return res; + } + } + io->should_dirty = !write && user_backed_iter(iter); while (count) { ssize_t nres; -- cgit v1.2.3 From e78662e818f9478e70912cc2970ca632ec9f3635 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 1 Aug 2023 16:06:46 +0800 Subject: fuse: add a new fuse init flag to relax restrictions in no cache mode FOPEN_DIRECT_IO is usually set by fuse daemon to indicate need of strong coherency, e.g. network filesystems. Thus shared mmap is disabled since it leverages page cache and may write to it, which may cause inconsistence. But FOPEN_DIRECT_IO can be used not for coherency but to reduce memory footprint as well, e.g. reduce guest memory usage with virtiofs. Therefore, add a new fuse init flag FUSE_DIRECT_IO_RELAX to relax restrictions in that mode, currently, it allows shared mmap. One thing to note is to make sure it doesn't break coherency in your use case. Signed-off-by: Hao Xu Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 +++++-- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 4 +++- include/uapi/linux/fuse.h | 8 +++++++- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1aa7dde665aa..e6034ce698e9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2451,14 +2451,17 @@ static const struct vm_operations_struct fuse_file_vm_ops = { static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fm->fc; /* DAX mmap is superior to direct_io mmap */ if (FUSE_IS_DAX(file_inode(file))) return fuse_dax_mmap(file, vma); if (ff->open_flags & FOPEN_DIRECT_IO) { - /* Can't provide the coherency needed for MAP_SHARED */ - if (vma->vm_flags & VM_MAYSHARE) + /* Can't provide the coherency needed for MAP_SHARED + * if FUSE_DIRECT_IO_RELAX isn't set. + */ + if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax) return -ENODEV; invalidate_inode_pages2(file->f_mapping); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 9b7fc7d3c7f1..d830c2360aef 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -792,6 +792,9 @@ struct fuse_conn { /* Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; + /* relax restrictions in FOPEN_DIRECT_IO mode */ + unsigned int direct_io_relax:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f19d748890f0..4a16185eacae 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1212,6 +1212,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP) fc->create_supp_group = 1; + if (flags & FUSE_DIRECT_IO_RELAX) + fc->direct_io_relax = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1258,7 +1260,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | - FUSE_HAS_EXPIRE_ONLY; + FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index b3fcab13fcd3..c2da9503b04e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -207,6 +207,9 @@ * - add FUSE_EXT_GROUPS * - add FUSE_CREATE_SUPP_GROUP * - add FUSE_HAS_EXPIRE_ONLY + * + * 7.39 + * - add FUSE_DIRECT_IO_RELAX */ #ifndef _LINUX_FUSE_H @@ -242,7 +245,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 38 +#define FUSE_KERNEL_MINOR_VERSION 39 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -371,6 +374,8 @@ struct fuse_file_lock { * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation + * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now + * allow shared mmap */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -409,6 +414,7 @@ struct fuse_file_lock { #define FUSE_HAS_INODE_DAX (1ULL << 33) #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) +#define FUSE_DIRECT_IO_RELAX (1ULL << 36) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From b5a2a3a0b77668257fa72ee6bc0eac90493f13c1 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 1 Aug 2023 16:06:47 +0800 Subject: fuse: write back dirty pages before direct write in direct_io_relax mode In direct_io_relax mode, there can be shared mmaped files and thus dirty pages in its page cache. Therefore those dirty pages should be written back to backend before direct io to avoid data loss. Signed-off-by: Hao Xu Reviewed-by: Jiachen Zhang Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e6034ce698e9..1cdb6327511e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1448,6 +1448,13 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; + if (fopen_direct_io && fc->direct_io_relax) { + res = filemap_write_and_wait_range(mapping, pos, pos + count - 1); + if (res) { + fuse_io_free(ia); + return res; + } + } if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { if (!write) inode_lock(inode); -- cgit v1.2.3 From 8d8f9c4b8df6bc2bf005c91b73b23a0e60f0e413 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Aug 2023 12:45:05 +0200 Subject: fuse: handle empty request_mask in statx If no attribute is requested, then don't send request to userspace. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f67bef9d83c4..d38ab93e2007 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1209,7 +1209,12 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, u32 inval_mask = READ_ONCE(fi->inval_mask); u32 cache_mask = fuse_get_cache_mask(inode); - if (flags & AT_STATX_FORCE_SYNC) + /* FUSE only supports basic stats */ + request_mask &= STATX_BASIC_STATS; + + if (!request_mask) + sync = false; + else if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) sync = false; -- cgit v1.2.3 From 9dc10a54abe50b733a5b561d5f8be718e79c3590 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Aug 2023 12:45:05 +0200 Subject: fuse: add ATTR_TIMEOUT macro Next patch will introduce yet another type attribute reply. Add a macro that can handle attribute timeouts for all of the structs. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 26 ++++++++------------------ fs/fuse/fuse_i.h | 5 ++++- fs/fuse/readdir.c | 4 ++-- 3 files changed, 14 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d38ab93e2007..04006db6e173 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -92,7 +92,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time) /* * Calculate the time in jiffies until a dentry/attributes are valid */ -static u64 time_to_jiffies(u64 sec, u32 nsec) +u64 fuse_time_to_jiffies(u64 sec, u32 nsec) { if (sec || nsec) { struct timespec64 ts = { @@ -112,17 +112,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec) void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) { fuse_dentry_settime(entry, - time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); -} - -static u64 attr_timeout(struct fuse_attr_out *o) -{ - return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); -} - -u64 entry_attr_timeout(struct fuse_entry_out *o) -{ - return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); + fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); } void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) @@ -266,7 +256,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) forget_all_cached_acls(inode); fuse_change_attributes(inode, &outarg.attr, - entry_attr_timeout(&outarg), + ATTR_TIMEOUT(&outarg), attr_version); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { @@ -399,7 +389,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out_put_forget; *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, - &outarg->attr, entry_attr_timeout(outarg), + &outarg->attr, ATTR_TIMEOUT(outarg), attr_version); err = -ENOMEM; if (!*inode) { @@ -686,7 +676,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ff->nodeid = outentry.nodeid; ff->open_flags = outopen.open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, - &outentry.attr, entry_attr_timeout(&outentry), 0); + &outentry.attr, ATTR_TIMEOUT(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(NULL, ff, flags); @@ -813,7 +803,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr, entry_attr_timeout(&outarg), 0); + &outarg.attr, ATTR_TIMEOUT(&outarg), 0); if (!inode) { fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); return -ENOMEM; @@ -1190,7 +1180,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, - attr_timeout(&outarg), + ATTR_TIMEOUT(&outarg), attr_version); if (stat) fuse_fillattr(inode, &outarg.attr, stat); @@ -1867,7 +1857,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg), + ATTR_TIMEOUT(&outarg), fuse_get_cache_mask(inode)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d830c2360aef..cfb97a147b66 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1114,7 +1114,10 @@ void fuse_invalidate_entry_cache(struct dentry *entry); void fuse_invalidate_atime(struct inode *inode); -u64 entry_attr_timeout(struct fuse_entry_out *o); +u64 fuse_time_to_jiffies(u64 sec, u32 nsec); +#define ATTR_TIMEOUT(o) \ + fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec) + void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); /** diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index b3d498163f97..c58447be5e4d 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -224,7 +224,7 @@ retry: forget_all_cached_acls(inode); fuse_change_attributes(inode, &o->attr, - entry_attr_timeout(o), + ATTR_TIMEOUT(o), attr_version); /* * The other branch comes via fuse_iget() @@ -232,7 +232,7 @@ retry: */ } else { inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, - &o->attr, entry_attr_timeout(o), + &o->attr, ATTR_TIMEOUT(o), attr_version); if (!inode) inode = ERR_PTR(-ENOMEM); -- cgit v1.2.3 From d3045530bdd29d91033eea437d8a961f4ee598b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Aug 2023 12:45:05 +0200 Subject: fuse: implement statx Allow querying btime. When btime is requested in mask, then FUSE_STATX request is sent. Otherwise keep using FUSE_GETATTR. The userspace interface for statx matches that of the statx(2) API. However there are limitations on how this interface is used: - returned basic stats and btime are used, stx_attributes, etc. are ignored - always query basic stats and btime, regardless of what was requested - requested sync type is ignored, the default is passed to the server - if server returns with some attributes missing from the result_mask, then no attributes will be cached - btime is not cached yet (next patch will fix that) For new inodes initialize fi->inval_mask to "all invalid", instead of "all valid" as previously. Also only clear basic stats from inval_mask when caching attributes. This will result in the caching logic not thinking that btime is cached. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/fuse/fuse_i.h | 3 ++ fs/fuse/inode.c | 5 +-- 3 files changed, 107 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 04006db6e173..e7f63e60118c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -350,10 +350,14 @@ int fuse_valid_type(int m) S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); } +static bool fuse_valid_size(u64 size) +{ + return size <= LLONG_MAX; +} + bool fuse_invalid_attr(struct fuse_attr *attr) { - return !fuse_valid_type(attr->mode) || - attr->size > LLONG_MAX; + return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size); } int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, @@ -1143,6 +1147,84 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, stat->blksize = 1 << blkbits; } +static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr) +{ + memset(attr, 0, sizeof(*attr)); + attr->ino = sx->ino; + attr->size = sx->size; + attr->blocks = sx->blocks; + attr->atime = sx->atime.tv_sec; + attr->mtime = sx->mtime.tv_sec; + attr->ctime = sx->ctime.tv_sec; + attr->atimensec = sx->atime.tv_nsec; + attr->mtimensec = sx->mtime.tv_nsec; + attr->ctimensec = sx->ctime.tv_nsec; + attr->mode = sx->mode; + attr->nlink = sx->nlink; + attr->uid = sx->uid; + attr->gid = sx->gid; + attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor)); + attr->blksize = sx->blksize; +} + +static int fuse_do_statx(struct inode *inode, struct file *file, + struct kstat *stat) +{ + int err; + struct fuse_attr attr; + struct fuse_statx *sx; + struct fuse_statx_in inarg; + struct fuse_statx_out outarg; + struct fuse_mount *fm = get_fuse_mount(inode); + u64 attr_version = fuse_get_attr_version(fm->fc); + FUSE_ARGS(args); + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); + /* Directories have separate file-handle space */ + if (file && S_ISREG(inode->i_mode)) { + struct fuse_file *ff = file->private_data; + + inarg.getattr_flags |= FUSE_GETATTR_FH; + inarg.fh = ff->fh; + } + /* For now leave sync hints as the default, request all stats. */ + inarg.sx_flags = 0; + inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME; + args.opcode = FUSE_STATX; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; + err = fuse_simple_request(fm, &args); + if (err) + return err; + + sx = &outarg.stat; + if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) || + ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) || + inode_wrong_type(inode, sx->mode)))) { + make_bad_inode(inode); + return -EIO; + } + + fuse_statx_to_attr(&outarg.stat, &attr); + if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) { + fuse_change_attributes(inode, &attr, ATTR_TIMEOUT(&outarg), + attr_version); + } + stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); + stat->btime.tv_sec = sx->btime.tv_sec; + stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); + fuse_fillattr(inode, &attr, stat); + stat->result_mask |= STATX_TYPE; + + return 0; +} + static int fuse_do_getattr(struct inode *inode, struct kstat *stat, struct file *file) { @@ -1194,13 +1276,18 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, unsigned int flags) { struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_conn *fc = get_fuse_conn(inode); int err = 0; bool sync; u32 inval_mask = READ_ONCE(fi->inval_mask); u32 cache_mask = fuse_get_cache_mask(inode); - /* FUSE only supports basic stats */ - request_mask &= STATX_BASIC_STATS; + + /* FUSE only supports basic stats and possibly btime */ + request_mask &= STATX_BASIC_STATS | STATX_BTIME; +retry: + if (fc->no_statx) + request_mask &= STATX_BASIC_STATS; if (!request_mask) sync = false; @@ -1215,7 +1302,16 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, if (sync) { forget_all_cached_acls(inode); - err = fuse_do_getattr(inode, stat, file); + /* Try statx if BTIME is requested */ + if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { + err = fuse_do_statx(inode, file, stat); + if (err == -ENOSYS) { + fc->no_statx = 1; + goto retry; + } + } else { + err = fuse_do_getattr(inode, stat, file); + } } else if (stat) { generic_fillattr(&nop_mnt_idmap, inode, stat); stat->mode = fi->orig_i_mode; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index cfb97a147b66..43c7b58d8ab7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -795,6 +795,9 @@ struct fuse_conn { /* relax restrictions in FOPEN_DIRECT_IO mode */ unsigned int direct_io_relax:1; + /* Is statx not implemented by fs? */ + unsigned int no_statx:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4a16185eacae..2a4d192502e7 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -77,7 +77,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) return NULL; fi->i_time = 0; - fi->inval_mask = 0; + fi->inval_mask = ~0; fi->nodeid = 0; fi->nlookup = 0; fi->attr_version = 0; @@ -172,7 +172,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; - WRITE_ONCE(fi->inval_mask, 0); + /* Clear basic stats from invalid mask */ + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); -- cgit v1.2.3 From 972f4c46d0a1bb7fde3ce0bd15775855b2d02c68 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Aug 2023 12:45:05 +0200 Subject: fuse: cache btime Not all inode attributes are supported by all filesystems, but for the basic stats (which are returned by stat(2) and friends) all of them will have some value, even if that doesn't reflect a real attribute of the file. Btime is different, in that filesystems are free to report or not report a value in statx. If the value is available, then STATX_BTIME bit is set in stx_mask. When caching the value of btime, remember the availability of the attribute as well as the value (if available). This is done by using the FUSE_I_BTIME bit in fuse_inode->state to indicate availability, while using fuse_inode->inval_mask & STATX_BTIME to indicate the state of the cache itself (i.e. set if cache is invalid, and cleared if cache is valid). Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 14 +++++++++----- fs/fuse/fuse_i.h | 7 +++++++ fs/fuse/inode.c | 25 +++++++++++++++++++++++-- fs/fuse/readdir.c | 2 +- 4 files changed, 40 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e7f63e60118c..3db9d26d341e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -255,7 +255,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) goto invalid; forget_all_cached_acls(inode); - fuse_change_attributes(inode, &outarg.attr, + fuse_change_attributes(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), attr_version); fuse_change_entry_timeout(entry, &outarg); @@ -1213,8 +1213,8 @@ static int fuse_do_statx(struct inode *inode, struct file *file, fuse_statx_to_attr(&outarg.stat, &attr); if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) { - fuse_change_attributes(inode, &attr, ATTR_TIMEOUT(&outarg), - attr_version); + fuse_change_attributes(inode, &attr, &outarg.stat, + ATTR_TIMEOUT(&outarg), attr_version); } stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); stat->btime.tv_sec = sx->btime.tv_sec; @@ -1261,7 +1261,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, fuse_make_bad(inode); err = -EIO; } else { - fuse_change_attributes(inode, &outarg.attr, + fuse_change_attributes(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), attr_version); if (stat) @@ -1316,6 +1316,10 @@ retry: generic_fillattr(&nop_mnt_idmap, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; + if (test_bit(FUSE_I_BTIME, &fi->state)) { + stat->btime = fi->i_btime; + stat->result_mask |= STATX_BTIME; + } } return err; @@ -1952,7 +1956,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, /* FIXME: clear I_DIRTY_SYNC? */ } - fuse_change_attributes_common(inode, &outarg.attr, + fuse_change_attributes_common(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), fuse_get_cache_mask(inode)); oldsize = inode->i_size; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 43c7b58d8ab7..bf0b85d0b95c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -88,6 +88,9 @@ struct fuse_inode { preserve the original mode */ umode_t orig_i_mode; + /* Cache birthtime */ + struct timespec64 i_btime; + /** 64 bit inode number */ u64 orig_ino; @@ -167,6 +170,8 @@ enum { FUSE_I_SIZE_UNSTABLE, /* Bad inode */ FUSE_I_BAD, + /* Has btime */ + FUSE_I_BTIME, }; struct fuse_conn; @@ -1064,9 +1069,11 @@ void fuse_init_symlink(struct inode *inode); * Change attributes of an inode */ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, u64 attr_version); void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, u32 cache_mask); u32 fuse_get_cache_mask(struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2a4d192502e7..d47606206ec3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -163,6 +163,7 @@ static ino_t fuse_squash_ino(u64 ino64) } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -198,6 +199,25 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_ctime.tv_sec = attr->ctime; inode->i_ctime.tv_nsec = attr->ctimensec; } + if (sx) { + /* Sanitize nsecs */ + sx->btime.tv_nsec = + min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); + + /* + * Btime has been queried, cache is valid (whether or not btime + * is available or not) so clear STATX_BTIME from inval_mask. + * + * Availability of the btime attribute is indicated in + * FUSE_I_BTIME + */ + set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); + if (sx->mask & STATX_BTIME) { + set_bit(FUSE_I_BTIME, &fi->state); + fi->i_btime.tv_sec = sx->btime.tv_sec; + fi->i_btime.tv_nsec = sx->btime.tv_nsec; + } + } if (attr->blksize != 0) inode->i_blkbits = ilog2(attr->blksize); @@ -237,6 +257,7 @@ u32 fuse_get_cache_mask(struct inode *inode) } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -271,7 +292,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, } old_mtime = inode->i_mtime; - fuse_change_attributes_common(inode, attr, attr_valid, cache_mask); + fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask); oldsize = inode->i_size; /* @@ -409,7 +430,7 @@ done: spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); - fuse_change_attributes(inode, attr, attr_valid, attr_version); + fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); return inode; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index c58447be5e4d..9e6d587b3e67 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -223,7 +223,7 @@ retry: spin_unlock(&fi->lock); forget_all_cached_acls(inode); - fuse_change_attributes(inode, &o->attr, + fuse_change_attributes(inode, &o->attr, NULL, ATTR_TIMEOUT(o), attr_version); /* -- cgit v1.2.3 From 7d875e66859a4359acd29ce0d188e1aff048e7ed Mon Sep 17 00:00:00 2001 From: Jiachen Zhang Date: Tue, 11 Jul 2023 12:34:02 +0800 Subject: fuse: invalidate dentry on EEXIST creates or ENOENT deletes The EEXIST errors returned from server are strong sign that a local negative dentry should be invalidated. Similarly, The ENOENT errors from server can also be a sign of revalidate failure. This commit invalidates dentries on EEXIST creates and ENOENT deletes by calling fuse_invalidate_entry(), which improves the consistency with no performance degradation. Signed-off-by: Jiachen Zhang Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3db9d26d341e..e190d09f220d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -749,7 +749,8 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (err == -ENOSYS) { fc->no_create = 1; goto mknod; - } + } else if (err == -EEXIST) + fuse_invalidate_entry(entry); out_dput: dput(res); return err; @@ -829,6 +830,8 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, return 0; out_put_forget_req: + if (err == -EEXIST) + fuse_invalidate_entry(entry); kfree(forget); return err; } @@ -980,7 +983,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) if (!err) { fuse_dir_changed(dir); fuse_entry_unlinked(entry); - } else if (err == -EINTR) + } else if (err == -EINTR || err == -ENOENT) fuse_invalidate_entry(entry); return err; } @@ -1003,7 +1006,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (!err) { fuse_dir_changed(dir); fuse_entry_unlinked(entry); - } else if (err == -EINTR) + } else if (err == -EINTR || err == -ENOENT) fuse_invalidate_entry(entry); return err; } @@ -1044,7 +1047,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, /* newent will end up negative */ if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) fuse_entry_unlinked(newent); - } else if (err == -EINTR) { + } else if (err == -EINTR || err == -ENOENT) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation fails (e.g. some process has CWD under the renamed -- cgit v1.2.3 From f73016b63b09edec8adf7e182600c52465c56ee7 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Thu, 24 Aug 2023 00:33:45 +0200 Subject: fuse: conditionally fill kstat in fuse_do_statx() The code path fuse_update_attributes fuse_update_get_attr fuse_do_statx has the risk to use a NULL pointer for struct kstat *stat, although current callers of fuse_update_attributes() only set request_mask to values that will trigger the call of fuse_do_getattr(), which already handles the NULL pointer. Future updates might miss that fuse_do_statx() does not handle it it is safer to add a condition already right now. Signed-off-by: Bernd Schubert Fixes: d3045530bdd2 ("fuse: implement statx") Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e190d09f220d..04a1c62342dc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1219,11 +1219,14 @@ static int fuse_do_statx(struct inode *inode, struct file *file, fuse_change_attributes(inode, &attr, &outarg.stat, ATTR_TIMEOUT(&outarg), attr_version); } - stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); - stat->btime.tv_sec = sx->btime.tv_sec; - stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); - fuse_fillattr(inode, &attr, stat); - stat->result_mask |= STATX_TYPE; + + if (stat) { + stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); + stat->btime.tv_sec = sx->btime.tv_sec; + stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); + fuse_fillattr(inode, &attr, stat); + stat->result_mask |= STATX_TYPE; + } return 0; } -- cgit v1.2.3