From 15d937d7ca8c55d2b0ce9116e20c780fdd0b67cc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Nov 2022 15:46:33 +0100 Subject: fuse: add request extension Will need to add supplementary groups to create messages, so add the general concept of a request extension. A request extension is appended to the end of the main request. It has a header indicating the size and type of the extension. The create security context (fuse_secctx_*) is similar to the generic request extension, so include that as well in a backward compatible manner. Add the total extension length to the request header. The offset of the extension block within the request can be calculated by: inh->len - inh->total_extlen * 8 Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 ++ fs/fuse/dir.c | 66 ++++++++++++++++++++++++++++++++------------------------ fs/fuse/fuse_i.h | 6 ++++-- 3 files changed, 44 insertions(+), 30 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e8b60ce72c9a..9f349f834977 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -476,6 +476,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; req->args = args; + if (args->is_ext) + req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8; if (args->end) __set_bit(FR_ASYNC, &req->flags); } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index cd1a071b625a..fa6381f199b3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -466,7 +466,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } static int get_security_context(struct dentry *entry, umode_t mode, - void **security_ctx, u32 *security_ctxlen) + struct fuse_in_arg *ext) { struct fuse_secctx *fctx; struct fuse_secctx_header *header; @@ -513,14 +513,42 @@ static int get_security_context(struct dentry *entry, umode_t mode, memcpy(ptr, ctx, ctxlen); } - *security_ctxlen = total_len; - *security_ctx = header; + ext->size = total_len; + ext->value = header; err = 0; out_err: kfree(ctx); return err; } +static int get_create_ext(struct fuse_args *args, struct dentry *dentry, + umode_t mode) +{ + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); + struct fuse_in_arg ext = { .size = 0, .value = NULL }; + int err = 0; + + if (fc->init_security) + err = get_security_context(dentry, mode, &ext); + + if (!err && ext.size) { + WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); + args->is_ext = true; + args->ext_idx = args->in_numargs++; + args->in_args[args->ext_idx] = ext; + } else { + kfree(ext.value); + } + + return err; +} + +static void free_ext_value(struct fuse_args *args) +{ + if (args->is_ext) + kfree(args->in_args[args->ext_idx].value); +} + /* * Atomic create+open operation * @@ -541,8 +569,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_inode *fi; struct fuse_file *ff; - void *security_ctx = NULL; - u32 security_ctxlen; bool trunc = flags & O_TRUNC; /* Userspace expects S_IFREG in create mode */ @@ -586,19 +612,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out_args[1].size = sizeof(outopen); args.out_args[1].value = &outopen; - if (fm->fc->init_security) { - err = get_security_context(entry, mode, &security_ctx, - &security_ctxlen); - if (err) - goto out_put_forget_req; - - args.in_numargs = 3; - args.in_args[2].size = security_ctxlen; - args.in_args[2].value = security_ctx; - } + err = get_create_ext(&args, entry, mode); + if (err) + goto out_put_forget_req; err = fuse_simple_request(fm, &args); - kfree(security_ctx); + free_ext_value(&args); if (err) goto out_free_ff; @@ -705,8 +724,6 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, struct dentry *d; int err; struct fuse_forget_link *forget; - void *security_ctx = NULL; - u32 security_ctxlen; if (fuse_is_bad(dir)) return -EIO; @@ -721,21 +738,14 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, args->out_args[0].size = sizeof(outarg); args->out_args[0].value = &outarg; - if (fm->fc->init_security && args->opcode != FUSE_LINK) { - err = get_security_context(entry, mode, &security_ctx, - &security_ctxlen); + if (args->opcode != FUSE_LINK) { + err = get_create_ext(args, entry, mode); if (err) goto out_put_forget_req; - - BUG_ON(args->in_numargs != 2); - - args->in_numargs = 3; - args->in_args[2].size = security_ctxlen; - args->in_args[2].value = security_ctx; } err = fuse_simple_request(fm, args); - kfree(security_ctx); + free_ext_value(args); if (err) goto out_put_forget_req; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c673faefdcb9..ac300f14aa2d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -249,8 +249,9 @@ struct fuse_page_desc { struct fuse_args { uint64_t nodeid; uint32_t opcode; - unsigned short in_numargs; - unsigned short out_numargs; + uint8_t in_numargs; + uint8_t out_numargs; + uint8_t ext_idx; bool force:1; bool noreply:1; bool nocreds:1; @@ -261,6 +262,7 @@ struct fuse_args { bool page_zeroing:1; bool page_replace:1; bool may_block:1; + bool is_ext:1; struct fuse_in_arg in_args[3]; struct fuse_arg out_args[2]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); -- cgit v1.2.3 From 8ed7cb3f279fe67a93f407ee2ec3ea661a483a65 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Nov 2022 15:46:33 +0100 Subject: fuse: optional supplementary group in create requests Permission to create an object (create, mkdir, symlink, mknod) needs to take supplementary groups into account. Add a supplementary group request extension. This can contain an arbitrary number of group IDs and can be added to any request. This extension is not added to any request by default. Add FUSE_CREATE_SUPP_GROUP init flag to enable supplementary group info in creation requests. This adds just a single supplementary group that matches the parent group in the case described above. In other cases the extension is not added. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 64 ++++++++++++++++++++++++++++++++++++++++++++--- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 4 ++- include/uapi/linux/fuse.h | 17 +++++++++++++ 4 files changed, 84 insertions(+), 4 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index fa6381f199b3..0c1d7e387f0d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -521,7 +521,63 @@ out_err: return err; } -static int get_create_ext(struct fuse_args *args, struct dentry *dentry, +static void *extend_arg(struct fuse_in_arg *buf, u32 bytes) +{ + void *p; + u32 newlen = buf->size + bytes; + + p = krealloc(buf->value, newlen, GFP_KERNEL); + if (!p) { + kfree(buf->value); + buf->size = 0; + buf->value = NULL; + return NULL; + } + + memset(p + buf->size, 0, bytes); + buf->value = p; + buf->size = newlen; + + return p + newlen - bytes; +} + +static u32 fuse_ext_size(size_t size) +{ + return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size); +} + +/* + * This adds just a single supplementary group that matches the parent's group. + */ +static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_ext_header *xh; + struct fuse_supp_groups *sg; + kgid_t kgid = dir->i_gid; + gid_t parent_gid = from_kgid(fc->user_ns, kgid); + u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0])); + + if (parent_gid == (gid_t) -1 || gid_eq(kgid, current_fsgid()) || + !in_group_p(kgid)) + return 0; + + xh = extend_arg(ext, sg_len); + if (!xh) + return -ENOMEM; + + xh->size = sg_len; + xh->type = FUSE_EXT_GROUPS; + + sg = (struct fuse_supp_groups *) &xh[1]; + sg->nr_groups = 1; + sg->groups[0] = parent_gid; + + return 0; +} + +static int get_create_ext(struct fuse_args *args, + struct inode *dir, struct dentry *dentry, umode_t mode) { struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); @@ -530,6 +586,8 @@ static int get_create_ext(struct fuse_args *args, struct dentry *dentry, if (fc->init_security) err = get_security_context(dentry, mode, &ext); + if (!err && fc->create_supp_group) + err = get_create_supp_group(dir, &ext); if (!err && ext.size) { WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); @@ -612,7 +670,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out_args[1].size = sizeof(outopen); args.out_args[1].value = &outopen; - err = get_create_ext(&args, entry, mode); + err = get_create_ext(&args, dir, entry, mode); if (err) goto out_put_forget_req; @@ -739,7 +797,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, args->out_args[0].value = &outarg; if (args->opcode != FUSE_LINK) { - err = get_create_ext(args, entry, mode); + err = get_create_ext(args, dir, entry, mode); if (err) goto out_put_forget_req; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ac300f14aa2d..e13a8eff2e3d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -783,6 +783,9 @@ struct fuse_conn { /* Initialize security xattrs when creating a new inode */ unsigned int init_security:1; + /* Add supplementary group info when creating a new inode */ + unsigned int create_supp_group:1; + /* Does the filesystem support per inode DAX? */ unsigned int inode_dax:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6b3beda16c1b..114bdb3f7ccb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1201,6 +1201,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->setxattr_ext = 1; if (flags & FUSE_SECURITY_CTX) fc->init_security = 1; + if (flags & FUSE_CREATE_SUPP_GROUP) + fc->create_supp_group = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1246,7 +1248,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | - FUSE_SECURITY_CTX; + FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index c71f12429e3d..1b9d0dfae72d 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -204,6 +204,8 @@ * - add total_extlen to fuse_in_header * - add FUSE_MAX_NR_SECCTX * - add extension header + * - add FUSE_EXT_GROUPS + * - add FUSE_CREATE_SUPP_GROUP */ #ifndef _LINUX_FUSE_H @@ -365,6 +367,8 @@ struct fuse_file_lock { * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and * mknod * FUSE_HAS_INODE_DAX: use per inode DAX + * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, + * symlink and mknod (single group that matches parent) */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -401,6 +405,7 @@ struct fuse_file_lock { /* bits 32..63 get shifted down 32 bits into the flags2 field */ #define FUSE_SECURITY_CTX (1ULL << 32) #define FUSE_HAS_INODE_DAX (1ULL << 33) +#define FUSE_CREATE_SUPP_GROUP (1ULL << 34) /** * CUSE INIT request/reply flags @@ -509,10 +514,12 @@ struct fuse_file_lock { /** * extension type * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx + * FUSE_EXT_GROUPS: &fuse_supp_groups extension */ enum fuse_ext_type { /* Types 0..31 are reserved for fuse_secctx_header */ FUSE_MAX_NR_SECCTX = 31, + FUSE_EXT_GROUPS = 32, }; enum fuse_opcode { @@ -1073,4 +1080,14 @@ struct fuse_ext_header { uint32_t type; }; +/** + * struct fuse_supp_groups - Supplementary group extension + * @nr_groups: number of supplementary groups + * @groups: flexible array of group IDs + */ +struct fuse_supp_groups { + uint32_t nr_groups; + uint32_t groups[]; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From 5a8bee63b10f6f2f52f6d22e109a4a147409842a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jan 2023 17:10:58 +0100 Subject: fuse: in fuse_flush only wait if someone wants the return code If a fuse filesystem is mounted inside a container, there is a problem during pid namespace destruction. The scenario is: 1. task (a thread in the fuse server, with a fuse file open) starts exiting, does exit_signals(), goes into fuse_flush() -> wait 2. fuse daemon gets killed, tries to wake everyone up 3. task from 1 is stuck because complete_signal() doesn't wake it up, since it has PF_EXITING. The result is that the thread will never be woken up, and pid namespace destruction will block indefinitely. To add insult to injury, nobody is waiting for these return codes, since the pid namespace is being destroyed. To fix this, let's not block on flush operations when the current task has PF_EXITING. This does change the semantics slightly: the wait here is for posix locks to be unlocked, so the task will exit before things are unlocked. To quote Miklos: "remote" posix locks are almost never used due to problems like this, so I think it's safe to do this. Signed-off-by: "Eric W. Biederman" Signed-off-by: Tycho Andersen Link: https://lore.kernel.org/all/YrShFXRLtRt6T%2Fj+@risky/ Tested-by: Tycho Andersen Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 89 +++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 26 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 875314ee6f59..ad5dc8f9acdf 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -18,6 +18,7 @@ #include #include #include +#include static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, int opcode, @@ -477,48 +478,36 @@ static void fuse_sync_writes(struct inode *inode) fuse_release_nowrite(inode); } -static int fuse_flush(struct file *file, fl_owner_t id) -{ - struct inode *inode = file_inode(file); - struct fuse_mount *fm = get_fuse_mount(inode); - struct fuse_file *ff = file->private_data; +struct fuse_flush_args { + struct fuse_args args; struct fuse_flush_in inarg; - FUSE_ARGS(args); - int err; - - if (fuse_is_bad(inode)) - return -EIO; + struct work_struct work; + struct file *file; +}; - if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache) - return 0; +static int fuse_do_flush(struct fuse_flush_args *fa) +{ + int err; + struct inode *inode = file_inode(fa->file); + struct fuse_mount *fm = get_fuse_mount(inode); err = write_inode_now(inode, 1); if (err) - return err; + goto out; inode_lock(inode); fuse_sync_writes(inode); inode_unlock(inode); - err = filemap_check_errors(file->f_mapping); + err = filemap_check_errors(fa->file->f_mapping); if (err) - return err; + goto out; err = 0; if (fm->fc->no_flush) goto inval_attr_out; - memset(&inarg, 0, sizeof(inarg)); - inarg.fh = ff->fh; - inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); - args.opcode = FUSE_FLUSH; - args.nodeid = get_node_id(inode); - args.in_numargs = 1; - args.in_args[0].size = sizeof(inarg); - args.in_args[0].value = &inarg; - args.force = true; - - err = fuse_simple_request(fm, &args); + err = fuse_simple_request(fm, &fa->args); if (err == -ENOSYS) { fm->fc->no_flush = 1; err = 0; @@ -531,9 +520,57 @@ inval_attr_out: */ if (!err && fm->fc->writeback_cache) fuse_invalidate_attr_mask(inode, STATX_BLOCKS); + +out: + fput(fa->file); + kfree(fa); return err; } +static void fuse_flush_async(struct work_struct *work) +{ + struct fuse_flush_args *fa = container_of(work, typeof(*fa), work); + + fuse_do_flush(fa); +} + +static int fuse_flush(struct file *file, fl_owner_t id) +{ + struct fuse_flush_args *fa; + struct inode *inode = file_inode(file); + struct fuse_mount *fm = get_fuse_mount(inode); + struct fuse_file *ff = file->private_data; + + if (fuse_is_bad(inode)) + return -EIO; + + if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache) + return 0; + + fa = kzalloc(sizeof(*fa), GFP_KERNEL); + if (!fa) + return -ENOMEM; + + fa->inarg.fh = ff->fh; + fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); + fa->args.opcode = FUSE_FLUSH; + fa->args.nodeid = get_node_id(inode); + fa->args.in_numargs = 1; + fa->args.in_args[0].size = sizeof(fa->inarg); + fa->args.in_args[0].value = &fa->inarg; + fa->args.force = true; + fa->file = get_file(file); + + /* Don't wait if the task is exiting */ + if (current->flags & PF_EXITING) { + INIT_WORK(&fa->work, fuse_flush_async); + schedule_work(&fa->work); + return 0; + } + + return fuse_do_flush(fa); +} + int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int datasync, int opcode) { -- cgit v1.2.3 From 06bbb761c12dd147e4499f4d7a187699c5a0391f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Jan 2023 17:00:23 -0800 Subject: fuse: fix all W=1 kernel-doc warnings Use correct function name in kernel-doc notation. (1) Don't use "/**" to begin non-kernel-doc comments. (3) Fixes these warnings: fs/fuse/cuse.c:272: warning: expecting prototype for cuse_parse_dev_info(). Prototype was for cuse_parse_devinfo() instead fs/fuse/dev.c:212: warning: expecting prototype for A new request is available, wake fiq(). Prototype was for fuse_dev_wake_and_unlock() instead fs/fuse/dir.c:149: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Mark the attributes as stale due to an atime change. Avoid the invalidate if fs/fuse/file.c:656: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * In case of short read, the caller sets 'pos' to the position of Signed-off-by: Randy Dunlap Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 2 +- fs/fuse/dev.c | 2 +- fs/fuse/dir.c | 2 +- fs/fuse/file.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index a06fbb1a8a5b..179a5c5e28fd 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -256,7 +256,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) } /** - * cuse_parse_dev_info - parse device info + * cuse_parse_devinfo - parse device info * @p: device info string * @len: length of device info string * @devinfo: out parameter for parsed device info diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9f349f834977..eb4f88e3dc97 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -204,7 +204,7 @@ static unsigned int fuse_req_hash(u64 unique) return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); } -/** +/* * A new request is available, wake fiq->waitq */ static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0c1d7e387f0d..7f4b29aa7c79 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -145,7 +145,7 @@ static void fuse_dir_changed(struct inode *dir) inode_maybe_inc_iversion(dir, false); } -/** +/* * Mark the attributes as stale due to an atime change. Avoid the invalidate if * atime is not used. */ diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ad5dc8f9acdf..3eb28aad5674 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -689,7 +689,7 @@ static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) return io->bytes < 0 ? io->size : io->bytes; } -/** +/* * In case of short read, the caller sets 'pos' to the position of * actual end of fuse request in IO request. Otherwise, if bytes_requested * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1. -- cgit v1.2.3 From 1cc4606d19e3710bfab3f6704b87ff9580493c69 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 26 Jan 2023 11:23:18 +0100 Subject: fuse: add inode/permission checks to fileattr_get/fileattr_set It looks like these checks were accidentally lost during the conversion to fileattr API. Fixes: 72227eac177d ("fuse: convert to fileattr") Cc: # v5.13 Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Miklos Szeredi --- fs/fuse/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/fuse') diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index fcce94ace2c2..8ba1545e01f9 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -419,6 +419,12 @@ static struct fuse_file *fuse_priv_ioctl_prepare(struct inode *inode) struct fuse_mount *fm = get_fuse_mount(inode); bool isdir = S_ISDIR(inode->i_mode); + if (!fuse_allow_current_process(fm->fc)) + return ERR_PTR(-EACCES); + + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!S_ISREG(inode->i_mode) && !isdir) return ERR_PTR(-ENOTTY); -- cgit v1.2.3