diff options
Diffstat (limited to 'fs')
407 files changed, 3457 insertions, 2755 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index c397c51f80d9..eed551d8555f 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -139,7 +139,7 @@ struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu } -struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *v9fs_iop_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { struct v9fs_session_info *v9ses; @@ -151,7 +151,7 @@ struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns, return v9fs_get_cached_acl(d_inode(dentry), type); } -int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int v9fs_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int retval; @@ -195,7 +195,7 @@ int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, goto err_out; } - if (!inode_owner_or_capable(&init_user_ns, inode)) { + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) { retval = -EPERM; goto err_out; } @@ -206,7 +206,7 @@ int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr iattr = {}; struct posix_acl *acl_mode = acl; - retval = posix_acl_update_mode(&init_user_ns, inode, + retval = posix_acl_update_mode(&nop_mnt_idmap, inode, &iattr.ia_mode, &acl_mode); if (retval) @@ -225,7 +225,7 @@ int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, * FIXME should we update ctime ? * What is the following setxattr update the mode ? */ - v9fs_vfs_setattr_dotl(&init_user_ns, dentry, &iattr); + v9fs_vfs_setattr_dotl(&nop_mnt_idmap, dentry, &iattr); } break; case ACL_TYPE_DEFAULT: diff --git a/fs/9p/acl.h b/fs/9p/acl.h index 4c60a2bce5de..333cfcc281da 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -10,9 +10,9 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid); struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu); -struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *v9fs_iop_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type); -int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int v9fs_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid); int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid, diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 6acabc2e7dc9..f3f74d197b5d 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -151,7 +151,7 @@ extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); -extern int v9fs_vfs_rename(struct user_namespace *mnt_userns, +extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index bc417da7e9c1..75106b9f293d 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -60,7 +60,7 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); int v9fs_uflags2omode(int uflags, int extended); void v9fs_blank_wstat(struct p9_wstat *wstat); -int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns, +int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, int datasync); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b740017634ef..b6ba22975781 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/sched.h> #include <linux/file.h> #include <linux/stat.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 27a04a226d97..4344e7a7865f 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -260,7 +260,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, { int err = 0; - inode_init_owner(&init_user_ns, inode, NULL, mode); + inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_blocks = 0; inode->i_rdev = rdev; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); @@ -672,7 +672,7 @@ error: /** * v9fs_vfs_create - VFS hook to create a regular file - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @dir: The parent directory * @dentry: The name of file to be created * @mode: The UNIX file mode to set @@ -684,7 +684,7 @@ error: */ static int -v9fs_vfs_create(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); @@ -704,14 +704,14 @@ v9fs_vfs_create(struct user_namespace *mnt_userns, struct inode *dir, /** * v9fs_vfs_mkdir - VFS mkdir hook to create a directory - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @mode: mode for new directory * */ -static int v9fs_vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int v9fs_vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int err; @@ -908,7 +908,7 @@ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) /** * v9fs_vfs_rename - VFS hook to rename an inode - * @mnt_userns: The user namespace of the mount + * @idmap: The idmap of the mount * @old_dir: old dir inode * @old_dentry: old dentry * @new_dir: new dir inode @@ -918,7 +918,7 @@ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) */ int -v9fs_vfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -1018,7 +1018,7 @@ error: /** * v9fs_vfs_getattr - retrieve file metadata - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @path: Object to query * @stat: metadata structure to populate * @request_mask: Mask of STATX_xxx flags indicating the caller's interests @@ -1027,7 +1027,7 @@ error: */ static int -v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path, +v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; @@ -1038,7 +1038,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path, p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { - generic_fillattr(&init_user_ns, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); return 0; } fid = v9fs_fid_lookup(dentry); @@ -1051,7 +1051,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path, return PTR_ERR(st); v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); - generic_fillattr(&init_user_ns, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); p9stat_free(st); kfree(st); @@ -1060,13 +1060,13 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path, /** * v9fs_vfs_setattr - set file metadata - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ -static int v9fs_vfs_setattr(struct user_namespace *mnt_userns, +static int v9fs_vfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; @@ -1077,7 +1077,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns, struct p9_wstat wstat; p9_debug(P9_DEBUG_VFS, "\n"); - retval = setattr_prepare(&init_user_ns, dentry, iattr); + retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; @@ -1135,7 +1135,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns, v9fs_invalidate_inode_attr(inode); - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); return 0; } @@ -1300,7 +1300,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, /** * v9fs_vfs_symlink - helper function to create symlinks - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @dir: directory inode containing symlink * @dentry: dentry for symlink * @symname: symlink data @@ -1310,7 +1310,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, */ static int -v9fs_vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n", @@ -1356,7 +1356,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, /** * v9fs_vfs_mknod - create a special file - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @mode: mode for creation @@ -1365,7 +1365,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, */ static int -v9fs_vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index f806b3f11649..3bed3eb3a0e2 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -30,7 +30,7 @@ #include "acl.h" static int -v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev); /** @@ -211,7 +211,7 @@ int v9fs_open_to_dotl_flags(int flags) /** * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. - * @mnt_userns: The user namespace of the mount + * @idmap: The user namespace of the mount * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @omode: create permissions @@ -219,10 +219,10 @@ int v9fs_open_to_dotl_flags(int flags) * */ static int -v9fs_vfs_create_dotl(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_create_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, bool excl) { - return v9fs_vfs_mknod_dotl(mnt_userns, dir, dentry, omode, 0); + return v9fs_vfs_mknod_dotl(idmap, dir, dentry, omode, 0); } static int @@ -356,14 +356,14 @@ out: /** * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory - * @mnt_userns: The user namespace of the mount + * @idmap: The idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @omode: mode for new directory * */ -static int v9fs_vfs_mkdir_dotl(struct user_namespace *mnt_userns, +static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode) { @@ -450,7 +450,7 @@ error: } static int -v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns, +v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -462,7 +462,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns, p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { - generic_fillattr(&init_user_ns, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); return 0; } fid = v9fs_fid_lookup(dentry); @@ -479,7 +479,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns, return PTR_ERR(st); v9fs_stat2inode_dotl(st, d_inode(dentry), 0); - generic_fillattr(&init_user_ns, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; @@ -529,13 +529,13 @@ static int v9fs_mapped_iattr_valid(int iattr_valid) /** * v9fs_vfs_setattr_dotl - set file metadata - * @mnt_userns: The user namespace of the mount + * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ -int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns, +int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; @@ -548,7 +548,7 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns, p9_debug(P9_DEBUG_VFS, "\n"); - retval = setattr_prepare(&init_user_ns, dentry, iattr); + retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; @@ -597,7 +597,7 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns, truncate_setsize(inode, iattr->ia_size); v9fs_invalidate_inode_attr(inode); - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ @@ -687,7 +687,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, } static int -v9fs_vfs_symlink_dotl(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int err; @@ -817,7 +817,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, /** * v9fs_vfs_mknod_dotl - create a special file - * @mnt_userns: The user namespace of the mount + * @idmap: The idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @omode: mode for creation @@ -825,7 +825,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, * */ static int -v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir, +v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev) { int err; diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index b6984311e00a..50f7f3f6b55e 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -150,7 +150,7 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler, } static int v9fs_xattr_handler_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/Makefile b/fs/Makefile index 4dea17840761..76abc9e055bd 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -16,7 +16,7 @@ obj-y := open.o read_write.o file_table.o super.o \ pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ fs_types.o fs_context.o fs_parser.o fsopen.o init.o \ - kernel_read_file.o remap_range.o + kernel_read_file.o mnt_idmapping.o remap_range.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o direct-io.o mpage.o diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 06b7c92343ad..223f0283d20f 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -144,7 +144,7 @@ struct adfs_discmap { /* Inode stuff */ struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); int adfs_write_inode(struct inode *inode, struct writeback_control *wbc); -int adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, +int adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); /* map.c */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index ee22278b0cfc..c3ac613d0975 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -294,7 +294,7 @@ out: * later. */ int -adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, +adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -302,7 +302,7 @@ adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, unsigned int ia_valid = attr->ia_valid; int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); /* * we can't change the UID or GID of any file - diff --git a/fs/affs/affs.h b/fs/affs/affs.h index bfa89e131ead..60685ec76d98 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -167,17 +167,17 @@ extern const struct export_operations affs_export_ops; extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); -extern int affs_create(struct user_namespace *mnt_userns, struct inode *dir, +extern int affs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool); -extern int affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +extern int affs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode); extern int affs_rmdir(struct inode *dir, struct dentry *dentry); extern int affs_link(struct dentry *olddentry, struct inode *dir, struct dentry *dentry); -extern int affs_symlink(struct user_namespace *mnt_userns, +extern int affs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname); -extern int affs_rename2(struct user_namespace *mnt_userns, +extern int affs_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); @@ -185,7 +185,7 @@ extern int affs_rename2(struct user_namespace *mnt_userns, /* inode.c */ extern struct inode *affs_new_inode(struct inode *dir); -extern int affs_notify_change(struct user_namespace *mnt_userns, +extern int affs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern void affs_evict_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, diff --git a/fs/affs/file.c b/fs/affs/file.c index cefa222f7881..8daeed31e1af 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -880,7 +880,7 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t isize = inode->i_size; int res; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 2352a75bd9d6..27f77a52c5c8 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -216,7 +216,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) } int -affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, +affs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -224,7 +224,7 @@ affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid); - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) goto out; @@ -250,7 +250,7 @@ affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, affs_truncate(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); if (attr->ia_valid & ATTR_MODE) diff --git a/fs/affs/namei.c b/fs/affs/namei.c index bcab18956b4f..d12ccfd2a83d 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -242,7 +242,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry) } int -affs_create(struct user_namespace *mnt_userns, struct inode *dir, +affs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; @@ -274,7 +274,7 @@ affs_create(struct user_namespace *mnt_userns, struct inode *dir, } int -affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +affs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -313,7 +313,7 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) } int -affs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +affs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct super_block *sb = dir->i_sb; @@ -503,7 +503,7 @@ done: return retval; } -int affs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, +int affs_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 7dcd59693a0c..d4ddb20d6732 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> static int afs_deliver_cb_init_call_back_state(struct afs_call *); static int afs_deliver_cb_init_call_back_state3(struct afs_call *); @@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call) * Abort a service call from within an action function. */ static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, - const char *why) + enum rxrpc_abort_reason why) { rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, error, why); @@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - afs_abort_service_call(call, 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative); afs_put_call(call); _leave(""); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b7c1f8c84b38..82690d1dd49a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -28,17 +28,17 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, in loff_t fpos, u64 ino, unsigned dtype); static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); -static int afs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int afs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl); -static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode); static int afs_rmdir(struct inode *dir, struct dentry *dentry); static int afs_unlink(struct inode *dir, struct dentry *dentry); static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry); -static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *content); -static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags); @@ -1332,7 +1332,7 @@ static const struct afs_operation_ops afs_mkdir_operation = { /* * create a directory on an AFS filesystem */ -static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct afs_operation *op; @@ -1630,7 +1630,7 @@ static const struct afs_operation_ops afs_create_operation = { /* * create a regular file on an AFS filesystem */ -static int afs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int afs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct afs_operation *op; @@ -1760,7 +1760,7 @@ static const struct afs_operation_ops afs_symlink_operation = { /* * create a symlink in an AFS filesystem */ -static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *content) { struct afs_operation *op; @@ -1897,7 +1897,7 @@ static const struct afs_operation_ops afs_rename_operation = { /* * rename a file in an AFS filesystem and/or move it between directories */ -static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/afs/flock.c b/fs/afs/flock.c index bbcc5afd1576..9c6dea3139f5 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -451,7 +451,7 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key, */ static int afs_do_setlk(struct file *file, struct file_lock *fl) { - struct inode *inode = locks_inode(file); + struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode; afs_lock_type_t type; @@ -701,7 +701,7 @@ error: */ static int afs_do_unlk(struct file *file, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); int ret; _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); @@ -721,7 +721,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl) */ static int afs_do_getlk(struct file *file, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct key *key = afs_file_key(file); int ret, lock_count; @@ -763,7 +763,7 @@ error: */ int afs_lock(struct file *file, int cmd, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); enum afs_flock_operation op; int ret; @@ -798,7 +798,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) */ int afs_flock(struct file *file, int cmd, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); enum afs_flock_operation op; int ret; @@ -843,7 +843,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl) */ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->fl_file)); _enter(""); @@ -861,7 +861,7 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl) */ static void afs_fl_release_private(struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->fl_file)); _enter(""); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 6d3a3dbe4928..0167e96e5198 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -737,7 +737,7 @@ error_unlock: /* * read the attributes of an inode */ -int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int afs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); @@ -761,7 +761,7 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) && stat->nlink > 0) stat->nlink -= 1; @@ -870,7 +870,7 @@ static const struct afs_operation_ops afs_setattr_operation = { /* * set the attributes of an inode */ -int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { const unsigned int supported = diff --git a/fs/afs/internal.h b/fs/afs/internal.h index fd8567b98e2b..ad8523d0d038 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/pagemap.h> #include <linux/rxrpc.h> #include <linux/key.h> @@ -1170,9 +1171,9 @@ extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *); extern struct inode *afs_root_iget(struct super_block *, struct key *); extern bool afs_check_validity(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); -extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *, +extern int afs_getattr(struct mnt_idmap *idmap, const struct path *, struct kstat *, u32, unsigned int); -extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *); +extern int afs_setattr(struct mnt_idmap *idmap, struct dentry *, struct iattr *); extern void afs_evict_inode(struct inode *); extern int afs_drop_inode(struct inode *); @@ -1387,7 +1388,7 @@ extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int, extern struct key *afs_request_key(struct afs_cell *); extern struct key *afs_request_key_rcu(struct afs_cell *); extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *); -extern int afs_permission(struct user_namespace *, struct inode *, int); +extern int afs_permission(struct mnt_idmap *, struct inode *, int); extern void __exit afs_clean_up_permit_cache(void); /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c62939e5ea1f..7817e2b860e5 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> struct workqueue_struct *afs_async_calls; @@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) error_do_abort: if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(call->net->socket, rxcall, - RX_USER_ABORT, ret, "KSD"); + RX_USER_ABORT, ret, + afs_abort_send_data_error); } else { len = 0; iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); @@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KIV"); + abort_code, ret, + afs_abort_op_not_supported); goto local_abort; case -EIO: pr_err("kAFS: Call %u in bad state %u\n", @@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call) if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KUM"); + abort_code, ret, + afs_abort_unmarshal_error); goto local_abort; default: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KER"); + abort_code, ret, + afs_abort_general_error); goto local_abort; } } @@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call, /* Kill off the call if it's still live. */ _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) + RX_USER_ABORT, -EINTR, + afs_abort_interrupted)) afs_set_call_complete(call, -EINTR, 0); } } @@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); fallthrough; default: _leave(" [error]"); @@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); } _leave(" [error]"); } @@ -900,6 +909,7 @@ int afs_extract_data(struct afs_call *call, bool want_more) ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, &call->iov_len, want_more, &remote_abort, &call->service_id); + trace_afs_receive_data(call, call->iter, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; diff --git a/fs/afs/security.c b/fs/afs/security.c index 7c6a63a30394..6a7744c9e2a2 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -395,7 +395,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ -int afs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int afs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct afs_vnode *vnode = AFS_FS_I(inode); diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 7751b0b3f81d..9048d8ccc715 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -97,7 +97,7 @@ static const struct afs_operation_ops afs_store_acl_operation = { * Set a file's AFS3 ACL. */ static int afs_xattr_set_acl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) @@ -228,7 +228,7 @@ static const struct afs_operation_ops yfs_store_opaque_acl2_operation = { * Set a file's YFS ACL. */ static int afs_xattr_set_yfs(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) @@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) spin_lock(&mm->ioctx_lock); rcu_read_lock(); table = rcu_dereference(mm->ioctx_table); + if (!table) + goto out_unlock; + for (i = 0; i < table->nr; i++) { struct kioctx *ctx; @@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) } } +out_unlock: rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); return res; diff --git a/fs/attr.c b/fs/attr.c index b45f30e516fa..aca9ff7aed33 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -14,6 +14,7 @@ #include <linux/capability.h> #include <linux/fsnotify.h> #include <linux/fcntl.h> +#include <linux/filelock.h> #include <linux/security.h> #include <linux/evm.h> #include <linux/ima.h> @@ -23,7 +24,7 @@ /** * setattr_should_drop_sgid - determine whether the setgid bit needs to be * removed - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @inode: inode to check * * This function determines whether the setgid bit needs to be removed. @@ -33,7 +34,7 @@ * * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise. */ -int setattr_should_drop_sgid(struct user_namespace *mnt_userns, +int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode) { umode_t mode = inode->i_mode; @@ -42,8 +43,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns, return 0; if (mode & S_IXGRP) return ATTR_KILL_SGID; - if (!in_group_or_capable(mnt_userns, inode, - i_gid_into_vfsgid(mnt_userns, inode))) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) return ATTR_KILL_SGID; return 0; } @@ -51,7 +51,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns, /** * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to * be dropped - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @inode: inode to check * * This function determines whether the set{g,u}id bits need to be removed. @@ -63,7 +63,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns, * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits * to remove, 0 otherwise. */ -int setattr_should_drop_suidgid(struct user_namespace *mnt_userns, +int setattr_should_drop_suidgid(struct mnt_idmap *idmap, struct inode *inode) { umode_t mode = inode->i_mode; @@ -73,7 +73,7 @@ int setattr_should_drop_suidgid(struct user_namespace *mnt_userns, if (unlikely(mode & S_ISUID)) kill = ATTR_KILL_SUID; - kill |= setattr_should_drop_sgid(mnt_userns, inode); + kill |= setattr_should_drop_sgid(idmap, inode); if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) return kill; @@ -84,24 +84,24 @@ EXPORT_SYMBOL(setattr_should_drop_suidgid); /** * chown_ok - verify permissions to chown inode - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @inode: inode to check permissions on * @ia_vfsuid: uid to chown @inode to * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply pass @nop_mnt_idmap. */ -static bool chown_ok(struct user_namespace *mnt_userns, +static bool chown_ok(struct mnt_idmap *idmap, const struct inode *inode, vfsuid_t ia_vfsuid) { - vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid()) && vfsuid_eq(ia_vfsuid, vfsuid)) return true; - if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN)) + if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN)) return true; if (!vfsuid_valid(vfsuid) && ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) @@ -111,28 +111,28 @@ static bool chown_ok(struct user_namespace *mnt_userns, /** * chgrp_ok - verify permissions to chgrp inode - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @inode: inode to check permissions on * @ia_vfsgid: gid to chown @inode to * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply pass @nop_mnt_idmap. */ -static bool chgrp_ok(struct user_namespace *mnt_userns, +static bool chgrp_ok(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t ia_vfsgid) { - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); - vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) { if (vfsgid_eq(ia_vfsgid, vfsgid)) return true; if (vfsgid_in_group_p(ia_vfsgid)) return true; } - if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN)) + if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN)) return true; if (!vfsgid_valid(vfsgid) && ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) @@ -142,7 +142,7 @@ static bool chgrp_ok(struct user_namespace *mnt_userns, /** * setattr_prepare - check if attribute changes to a dentry are allowed - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: dentry to check * @attr: attributes to change * @@ -152,16 +152,16 @@ static bool chgrp_ok(struct user_namespace *mnt_userns, * SGID bit from mode if user is not allowed to set it. Also file capabilities * and IMA extended attributes are cleared if ATTR_KILL_PRIV is set. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply passs @nop_mnt_idmap. * * Should be called as the first thing in ->setattr implementations, * possibly after taking additional locks. */ -int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry, +int setattr_prepare(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -183,34 +183,34 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry, /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && - !chown_ok(mnt_userns, inode, attr->ia_vfsuid)) + !chown_ok(idmap, inode, attr->ia_vfsuid)) return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && - !chgrp_ok(mnt_userns, inode, attr->ia_vfsgid)) + !chgrp_ok(idmap, inode, attr->ia_vfsgid)) return -EPERM; /* Make sure a caller can chmod. */ if (ia_valid & ATTR_MODE) { vfsgid_t vfsgid; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (ia_valid & ATTR_GID) vfsgid = attr->ia_vfsgid; else - vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); /* Also check the setgid bit! */ - if (!in_group_or_capable(mnt_userns, inode, vfsgid)) + if (!in_group_or_capable(idmap, inode, vfsgid)) attr->ia_mode &= ~S_ISGID; } /* Check for setting the inode time. */ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EPERM; } @@ -219,7 +219,7 @@ kill_priv: if (ia_valid & ATTR_KILL_PRIV) { int error; - error = security_inode_killpriv(mnt_userns, dentry); + error = security_inode_killpriv(idmap, dentry); if (error) return error; } @@ -276,7 +276,7 @@ EXPORT_SYMBOL(inode_newsize_ok); /** * setattr_copy - copy simple metadata updates into the generic inode - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: the inode to be updated * @attr: the new attributes * @@ -289,23 +289,23 @@ EXPORT_SYMBOL(inode_newsize_ok); * Noticeably missing is inode size update, which is more complex * as it requires pagecache updates. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply pass @nop_mnt_idmap. * * The inode is not marked as dirty after this operation. The rationale is * that for "simple" filesystems, the struct inode is the inode storage. * The caller is free to mark the inode dirty afterwards if needed. */ -void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode, +void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, const struct iattr *attr) { unsigned int ia_valid = attr->ia_valid; - i_uid_update(mnt_userns, attr, inode); - i_gid_update(mnt_userns, attr, inode); + i_uid_update(idmap, attr, inode); + i_gid_update(idmap, attr, inode); if (ia_valid & ATTR_ATIME) inode->i_atime = attr->ia_atime; if (ia_valid & ATTR_MTIME) @@ -314,15 +314,15 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode, inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - if (!in_group_or_capable(mnt_userns, inode, - i_gid_into_vfsgid(mnt_userns, inode))) + if (!in_group_or_capable(idmap, inode, + i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; inode->i_mode = mode; } } EXPORT_SYMBOL(setattr_copy); -int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, +int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid) { int error; @@ -340,8 +340,8 @@ int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, if (IS_IMMUTABLE(inode)) return -EPERM; - if (!inode_owner_or_capable(mnt_userns, inode)) { - error = inode_permission(mnt_userns, inode, MAY_WRITE); + if (!inode_owner_or_capable(idmap, inode)) { + error = inode_permission(idmap, inode, MAY_WRITE); if (error) return error; } @@ -352,7 +352,7 @@ EXPORT_SYMBOL(may_setattr); /** * notify_change - modify attributes of a filesytem object - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: object affected * @attr: new attributes * @delegated_inode: returns inode, if the inode is delegated @@ -371,13 +371,13 @@ EXPORT_SYMBOL(may_setattr); * the file open for write, as there can be no conflicting delegation in * that case. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply pass @nop_mnt_idmap. */ -int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, +int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; @@ -388,7 +388,7 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, WARN_ON_ONCE(!inode_is_locked(inode)); - error = may_setattr(mnt_userns, inode, ia_valid); + error = may_setattr(idmap, inode, ia_valid); if (error) return error; @@ -453,11 +453,11 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, * namespace of the superblock. */ if (ia_valid & ATTR_UID && - !vfsuid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns, + !vfsuid_has_fsmapping(idmap, inode->i_sb->s_user_ns, attr->ia_vfsuid)) return -EOVERFLOW; if (ia_valid & ATTR_GID && - !vfsgid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns, + !vfsgid_has_fsmapping(idmap, inode->i_sb->s_user_ns, attr->ia_vfsgid)) return -EOVERFLOW; @@ -465,13 +465,13 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, * gids unless those uids & gids are being made valid. */ if (!(ia_valid & ATTR_UID) && - !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode))) + !vfsuid_valid(i_uid_into_vfsuid(idmap, inode))) return -EOVERFLOW; if (!(ia_valid & ATTR_GID) && - !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode))) + !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; - error = security_inode_setattr(mnt_userns, dentry, attr); + error = security_inode_setattr(idmap, dentry, attr); if (error) return error; error = try_break_deleg(inode, delegated_inode); @@ -479,13 +479,13 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, return error; if (inode->i_op->setattr) - error = inode->i_op->setattr(mnt_userns, dentry, attr); + error = inode->i_op->setattr(idmap, dentry, attr); else - error = simple_setattr(mnt_userns, dentry, attr); + error = simple_setattr(idmap, dentry, attr); if (!error) { fsnotify_change(dentry, ia_valid); - ima_inode_post_setattr(mnt_userns, dentry); + ima_inode_post_setattr(idmap, dentry); evm_inode_post_setattr(dentry, ia_valid); } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index ca03c1cae2be..6baf90b08e0e 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -10,12 +10,12 @@ #include "autofs_i.h" -static int autofs_dir_permission(struct user_namespace *, struct inode *, int); -static int autofs_dir_symlink(struct user_namespace *, struct inode *, +static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int); +static int autofs_dir_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); static int autofs_dir_unlink(struct inode *, struct dentry *); static int autofs_dir_rmdir(struct inode *, struct dentry *); -static int autofs_dir_mkdir(struct user_namespace *, struct inode *, +static int autofs_dir_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); static long autofs_root_ioctl(struct file *, unsigned int, unsigned long); #ifdef CONFIG_COMPAT @@ -543,7 +543,7 @@ static struct dentry *autofs_lookup(struct inode *dir, return NULL; } -static int autofs_dir_permission(struct user_namespace *mnt_userns, +static int autofs_dir_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { if (mask & MAY_WRITE) { @@ -560,10 +560,10 @@ static int autofs_dir_permission(struct user_namespace *mnt_userns, return -EACCES; } - return generic_permission(mnt_userns, inode, mask); + return generic_permission(idmap, inode, mask); } -static int autofs_dir_symlink(struct user_namespace *mnt_userns, +static int autofs_dir_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { @@ -720,7 +720,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) return 0; } -static int autofs_dir_mkdir(struct user_namespace *mnt_userns, +static int autofs_dir_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 92737166203f..db649487d58c 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -27,7 +27,7 @@ static const struct file_operations bad_file_ops = .open = bad_file_open, }; -static int bad_inode_create(struct user_namespace *mnt_userns, +static int bad_inode_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -51,14 +51,14 @@ static int bad_inode_unlink(struct inode *dir, struct dentry *dentry) return -EIO; } -static int bad_inode_symlink(struct user_namespace *mnt_userns, +static int bad_inode_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { return -EIO; } -static int bad_inode_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int bad_inode_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { return -EIO; @@ -69,13 +69,13 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry) return -EIO; } -static int bad_inode_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int bad_inode_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { return -EIO; } -static int bad_inode_rename2(struct user_namespace *mnt_userns, +static int bad_inode_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -89,20 +89,20 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, return -EIO; } -static int bad_inode_permission(struct user_namespace *mnt_userns, +static int bad_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return -EIO; } -static int bad_inode_getattr(struct user_namespace *mnt_userns, +static int bad_inode_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { return -EIO; } -static int bad_inode_setattr(struct user_namespace *mnt_userns, +static int bad_inode_setattr(struct mnt_idmap *idmap, struct dentry *direntry, struct iattr *attrs) { return -EIO; @@ -146,14 +146,14 @@ static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, return -EIO; } -static int bad_inode_tmpfile(struct user_namespace *mnt_userns, +static int bad_inode_tmpfile(struct mnt_idmap *idmap, struct inode *inode, struct file *file, umode_t mode) { return -EIO; } -static int bad_inode_set_acl(struct user_namespace *mnt_userns, +static int bad_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 34d4f68f786b..040d5140e426 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -75,7 +75,7 @@ const struct file_operations bfs_dir_operations = { .llseek = generic_file_llseek, }; -static int bfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int bfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { int err; @@ -96,7 +96,7 @@ static int bfs_create(struct user_namespace *mnt_userns, struct inode *dir, } set_bit(ino, info->si_imap); info->si_freei--; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_blocks = 0; inode->i_op = &bfs_file_inops; @@ -199,7 +199,7 @@ out_brelse: return error; } -static int bfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index de63572a9404..9a780fafc539 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm) * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 096e3520a0b1..a05eafcacfb2 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 3da1779e8b79..7427449a04a3 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -110,7 +110,7 @@ out: return ret; } -int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int ret; @@ -118,7 +118,7 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, umode_t old_mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { - ret = posix_acl_update_mode(mnt_userns, inode, + ret = posix_acl_update_mode(idmap, inode, &inode->i_mode, &acl); if (ret) return ret; diff --git a/fs/btrfs/acl.h b/fs/btrfs/acl.h index 39bd36e6eeb7..a270e71ec05f 100644 --- a/fs/btrfs/acl.h +++ b/fs/btrfs/acl.h @@ -6,7 +6,7 @@ #ifdef CONFIG_BTRFS_FS_POSIX_ACL struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); -int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 21c92c74bf71..46851511b661 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -484,6 +484,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, u64 wanted_disk_byte = ref->wanted_disk_byte; u64 count = 0; u64 data_offset; + u8 type; if (level != 0) { eb = path->nodes[level]; @@ -538,6 +539,9 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, continue; } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + type = btrfs_file_extent_type(eb, fi); + if (type == BTRFS_FILE_EXTENT_INLINE) + goto next; disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); data_offset = btrfs_file_extent_offset(eb, fi); diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index b8fb7ef6b520..8affc88b0e0a 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -329,7 +329,16 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, &map_length, &bioc, mirror_num); if (ret) goto out_counter_dec; - BUG_ON(mirror_num != bioc->mirror_num); + /* + * This happens when dev-replace is also running, and the + * mirror_num indicates the dev-replace target. + * + * In this case, we don't need to do anything, as the read + * error just means the replace progress hasn't reached our + * read range, and later replace routine would handle it well. + */ + if (mirror_num != bioc->mirror_num) + goto out_counter_dec; } sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 195c09e20609..7c1527fcc7b3 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -469,7 +469,7 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, int btrfs_create_new_inode(struct btrfs_trans_handle *trans, struct btrfs_new_inode_args *args); void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args); -struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns, +struct inode *btrfs_new_subvol_inode(struct mnt_idmap *idmap, struct inode *dir); void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state, u32 bits); diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 0a3c261b69c9..d81b764a7644 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -358,8 +358,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } level = btrfs_header_level(root->node); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0888d484df80..3aa04224315e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -367,7 +367,14 @@ error: btrfs_print_tree(eb, 0); btrfs_err(fs_info, "block=%llu write time tree block corruption detected", eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + /* + * Be noisy if this is an extent buffer from a log tree. We don't abort + * a transaction in case there's a bad log tree extent buffer, we just + * fallback to a transaction commit. Still we want to know when there is + * a bad log tree extent buffer, as that may signal a bug somewhere. + */ + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || + btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); return ret; } @@ -530,6 +537,9 @@ static int validate_extent_buffer(struct extent_buffer *eb, } if (found_level != check->level) { + btrfs_err(fs_info, + "level verify failed on logical %llu mirror %u wanted %u found %u", + eb->start, eb->read_mirror, check->level, found_level); ret = -EIO; goto out; } @@ -3381,6 +3391,8 @@ out: /* * Do various sanity and dependency checks of different features. * + * @is_rw_mount: If the mount is read-write. + * * This is the place for less strict checks (like for subpage or artificial * feature dependencies). * @@ -3391,7 +3403,7 @@ out: * (space cache related) can modify on-disk format like free space tree and * screw up certain feature dependencies. */ -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) { struct btrfs_super_block *disk_super = fs_info->super_copy; u64 incompat = btrfs_super_incompat_flags(disk_super); @@ -3430,7 +3442,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - if (compat_ro_unsupp && !sb_rdonly(sb)) { + if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", compat_ro); @@ -3633,7 +3645,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); if (ret < 0) { err = ret; goto fail_alloc; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 363935cfc084..f2f295eb6103 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -50,7 +50,7 @@ int __cold open_ctree(struct super_block *sb, void __cold close_ctree(struct btrfs_fs_info *fs_info); int btrfs_validate_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb, int mirror_num); -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb); +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 9ae9cd1e7035..3c7766dfaa69 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1551,7 +1551,7 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 last = 0; int found = 0; - if (WARN_ON(search_end <= cur_start)) + if (WARN_ON(search_end < cur_start)) return 0; spin_lock(&tree->lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 892d78c1853c..72ba13b027a9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1713,6 +1713,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG(); if (ret && insert_reserved) btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); + if (ret < 0) + btrfs_err(trans->fs_info, +"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", + node->bytenr, node->num_bytes, node->type, + node->action, node->ref_mod, ret); return ret; } @@ -1954,8 +1959,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, if (ret) { unselect_delayed_ref_head(delayed_refs, locked_ref); btrfs_put_delayed_ref(ref); - btrfs_debug(fs_info, "run_one_delayed_ref returned %d", - ret); return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 83dd3aa59663..3bbf8703db2a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -104,6 +104,15 @@ struct btrfs_bio_ctrl { btrfs_bio_end_io_t end_io_func; /* + * This is for metadata read, to provide the extra needed verification + * info. This has to be provided for submit_one_bio(), as + * submit_one_bio() can submit a bio if it ends at stripe boundary. If + * no such parent_check is provided, the metadata can hit false alert at + * endio time. + */ + struct btrfs_tree_parent_check *parent_check; + + /* * Tell writepage not to lock the state bits for this range, it still * does the unlocking. */ @@ -133,13 +142,24 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset; - if (!is_data_inode(&inode->vfs_inode)) + if (!is_data_inode(&inode->vfs_inode)) { + if (btrfs_op(bio) != BTRFS_MAP_WRITE) { + /* + * For metadata read, we should have the parent_check, + * and copy it to bbio for metadata verification. + */ + ASSERT(bio_ctrl->parent_check); + memcpy(&btrfs_bio(bio)->parent_check, + bio_ctrl->parent_check, + sizeof(struct btrfs_tree_parent_check)); + } btrfs_submit_metadata_bio(inode, bio, mirror_num); - else if (btrfs_op(bio) == BTRFS_MAP_WRITE) + } else if (btrfs_op(bio) == BTRFS_MAP_WRITE) { btrfs_submit_data_write_bio(inode, bio, mirror_num); - else + } else { btrfs_submit_data_read_bio(inode, bio, mirror_num, bio_ctrl->compress_type); + } /* The bio is owned by the end_io handler now */ bio_ctrl->bio = NULL; @@ -3806,6 +3826,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, lockend = round_up(start + len, inode->root->fs_info->sectorsize); prev_extent_end = lockstart; + btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); @@ -3999,6 +4020,7 @@ check_eof_delalloc: out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); btrfs_free_backref_share_ctx(backref_ctx); @@ -4829,6 +4851,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, + .parent_check = check, }; int ret = 0; @@ -4878,7 +4901,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, */ atomic_dec(&eb->io_pages); } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) { free_extent_state(cached_state); @@ -4905,6 +4927,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, unsigned long num_reads = 0; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, + .parent_check = check, }; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) @@ -4996,7 +5019,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, } } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 91b00eb2440e..af046d22300e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3354,7 +3354,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, bool search_io_tree = true; bool ret = false; - while (cur_offset < end) { + while (cur_offset <= end) { u64 delalloc_start; u64 delalloc_end; bool delalloc; @@ -3541,6 +3541,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *extent; u64 extent_end; + u8 type; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); @@ -3596,10 +3597,16 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + type = btrfs_file_extent_type(leaf, extent); - if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 || - btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_PREALLOC) { + /* + * Can't access the extent's disk_bytenr field if this is an + * inline extent, since at that offset, it's where the extent + * data starts. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC || + (type == BTRFS_FILE_EXTENT_REG && + btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) { /* * Explicit hole or prealloc extent, search for delalloc. * A prealloc extent is treated like a hole. diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index a749367e5ae2..37b86acfcbcf 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -119,6 +119,12 @@ enum { /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, + /* + * Indicate metadata over-commit is disabled. This is set when active + * zone tracking is needed. + */ + BTRFS_FS_NO_OVERCOMMIT, + #if BITS_PER_LONG == 32 /* Indicate if we have error/warn message printed on 32bit systems */ BTRFS_FS_32BIT_ERROR, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bcad9940154..efee6d35af52 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5281,7 +5281,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) return ret; } -static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +static int btrfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -5291,7 +5291,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr if (btrfs_root_readonly(root)) return -EROFS; - err = setattr_prepare(mnt_userns, dentry, attr); + err = setattr_prepare(idmap, dentry, attr); if (err) return err; @@ -5302,12 +5302,12 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr } if (attr->ia_valid) { - setattr_copy(mnt_userns, inode, attr); + setattr_copy(idmap, inode, attr); inode_inc_iversion(inode); err = btrfs_dirty_inode(BTRFS_I(inode)); if (!err && attr->ia_valid & ATTR_MODE) - err = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); + err = posix_acl_chmod(idmap, dentry, inode->i_mode); } return err; @@ -6724,7 +6724,7 @@ out_inode: return err; } -static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int btrfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -6732,13 +6732,13 @@ static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, inode = new_inode(dir->i_sb); if (!inode) return -ENOMEM; - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); return btrfs_create_common(dir, dentry, inode); } -static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int btrfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; @@ -6746,7 +6746,7 @@ static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir, inode = new_inode(dir->i_sb); if (!inode) return -ENOMEM; - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; inode->i_mapping->a_ops = &btrfs_aops; @@ -6837,7 +6837,7 @@ fail: return err; } -static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int btrfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -6845,7 +6845,7 @@ static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, inode = new_inode(dir->i_sb); if (!inode) return -ENOMEM; - inode_init_owner(mnt_userns, inode, dir, S_IFDIR | mode); + inode_init_owner(idmap, inode, dir, S_IFDIR | mode); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; return btrfs_create_common(dir, dentry, inode); @@ -7092,7 +7092,7 @@ next: * Other members are not utilized for inline extents. */ ASSERT(em->block_start == EXTENT_MAP_INLINE); - ASSERT(em->len = fs_info->sectorsize); + ASSERT(em->len == fs_info->sectorsize); ret = read_inline_extent(inode, path, page); if (ret < 0) @@ -8802,7 +8802,7 @@ out: return ret; } -struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns, +struct inode *btrfs_new_subvol_inode(struct mnt_idmap *idmap, struct inode *dir) { struct inode *inode; @@ -8813,7 +8813,7 @@ struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns, * Subvolumes don't inherit the sgid bit or the parent's gid if * the parent's sgid bit is set. This is probably a bug. */ - inode_init_owner(mnt_userns, inode, NULL, + inode_init_owner(idmap, inode, NULL, S_IFDIR | (~current_umask() & S_IRWXUGO)); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; @@ -9004,7 +9004,7 @@ fail: return -ENOMEM; } -static int btrfs_getattr(struct user_namespace *mnt_userns, +static int btrfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -9034,7 +9034,7 @@ static int btrfs_getattr(struct user_namespace *mnt_userns, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; spin_lock(&BTRFS_I(inode)->lock); @@ -9289,14 +9289,14 @@ out_notrans: return ret; } -static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns, +static struct inode *new_whiteout_inode(struct mnt_idmap *idmap, struct inode *dir) { struct inode *inode; inode = new_inode(dir->i_sb); if (inode) { - inode_init_owner(mnt_userns, inode, dir, + inode_init_owner(idmap, inode, dir, S_IFCHR | WHITEOUT_MODE); inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); @@ -9304,7 +9304,7 @@ static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns, return inode; } -static int btrfs_rename(struct user_namespace *mnt_userns, +static int btrfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -9376,9 +9376,11 @@ static int btrfs_rename(struct user_namespace *mnt_userns, filemap_flush(old_inode->i_mapping); if (flags & RENAME_WHITEOUT) { - whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir); - if (!whiteout_args.inode) - return -ENOMEM; + whiteout_args.inode = new_whiteout_inode(idmap, old_dir); + if (!whiteout_args.inode) { + ret = -ENOMEM; + goto out_fscrypt_names; + } ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items); if (ret) goto out_whiteout_inode; @@ -9543,7 +9545,7 @@ out_fscrypt_names: return ret; } -static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, +static int btrfs_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -9556,7 +9558,7 @@ static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_di ret = btrfs_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); else - ret = btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir, + ret = btrfs_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); btrfs_btree_balance_dirty(BTRFS_I(new_dir)->root->fs_info); @@ -9756,7 +9758,7 @@ out: return ret; } -static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); @@ -9784,7 +9786,7 @@ static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, inode = new_inode(dir->i_sb); if (!inode) return -ENOMEM; - inode_init_owner(mnt_userns, inode, dir, S_IFLNK | S_IRWXUGO); + inode_init_owner(idmap, inode, dir, S_IFLNK | S_IRWXUGO); inode->i_op = &btrfs_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &btrfs_aops; @@ -10073,7 +10075,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, min_size, actual_len, alloc_hint, trans); } -static int btrfs_permission(struct user_namespace *mnt_userns, +static int btrfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -10086,10 +10088,10 @@ static int btrfs_permission(struct user_namespace *mnt_userns, if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) return -EACCES; } - return generic_permission(mnt_userns, inode, mask); + return generic_permission(idmap, inode, mask); } -static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int btrfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); @@ -10107,7 +10109,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, inode = new_inode(dir->i_sb); if (!inode) return -ENOMEM; - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; inode->i_mapping->a_ops = &btrfs_aops; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7e348bd2ccde..5ba1ff31713b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -243,7 +243,7 @@ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int btrfs_fileattr_set(struct user_namespace *mnt_userns, +int btrfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); @@ -578,7 +578,7 @@ static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit return num_items; } -static noinline int create_subvol(struct user_namespace *mnt_userns, +static noinline int create_subvol(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, struct btrfs_qgroup_inherit *inherit) { @@ -623,7 +623,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, if (ret < 0) goto out_root_item; - new_inode_args.inode = btrfs_new_subvol_inode(mnt_userns, dir); + new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir); if (!new_inode_args.inode) { ret = -ENOMEM; goto out_anon_dev; @@ -898,7 +898,7 @@ free_pending: * nfs_async_unlink(). */ -static int btrfs_may_delete(struct user_namespace *mnt_userns, +static int btrfs_may_delete(struct mnt_idmap *idmap, struct inode *dir, struct dentry *victim, int isdir) { int error; @@ -909,12 +909,12 @@ static int btrfs_may_delete(struct user_namespace *mnt_userns, BUG_ON(d_inode(victim->d_parent) != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); - error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); + error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) return -EPERM; - if (check_sticky(mnt_userns, dir, d_inode(victim)) || + if (check_sticky(idmap, dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim))) return -EPERM; @@ -933,16 +933,16 @@ static int btrfs_may_delete(struct user_namespace *mnt_userns, } /* copy of may_create in fs/namei.c() */ -static inline int btrfs_may_create(struct user_namespace *mnt_userns, +static inline int btrfs_may_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *child) { if (d_really_is_positive(child)) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns)) + if (!fsuidgid_has_mapping(dir->i_sb, idmap)) return -EOVERFLOW; - return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); + return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); } /* @@ -951,7 +951,7 @@ static inline int btrfs_may_create(struct user_namespace *mnt_userns, * inside this filesystem so it's quite a bit simpler. */ static noinline int btrfs_mksubvol(const struct path *parent, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const char *name, int namelen, struct btrfs_root *snap_src, bool readonly, @@ -967,12 +967,12 @@ static noinline int btrfs_mksubvol(const struct path *parent, if (error == -EINTR) return error; - dentry = lookup_one(mnt_userns, name, parent->dentry, namelen); + dentry = lookup_one(idmap, name, parent->dentry, namelen); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_unlock; - error = btrfs_may_create(mnt_userns, dir, dentry); + error = btrfs_may_create(idmap, dir, dentry); if (error) goto out_dput; @@ -993,7 +993,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, if (snap_src) error = create_snapshot(snap_src, dir, dentry, readonly, inherit); else - error = create_subvol(mnt_userns, dir, dentry, inherit); + error = create_subvol(idmap, dir, dentry, inherit); if (!error) fsnotify_mkdir(dir, dentry); @@ -1007,7 +1007,7 @@ out_unlock: } static noinline int btrfs_mksnapshot(const struct path *parent, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const char *name, int namelen, struct btrfs_root *root, bool readonly, @@ -1037,7 +1037,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); - ret = btrfs_mksubvol(parent, mnt_userns, name, namelen, + ret = btrfs_mksubvol(parent, idmap, name, namelen, root, readonly, inherit); out: if (snapshot_force_cow) @@ -1240,7 +1240,7 @@ out_drop: } static noinline int __btrfs_ioctl_snap_create(struct file *file, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const char *name, unsigned long fd, int subvol, bool readonly, struct btrfs_qgroup_inherit *inherit) @@ -1268,7 +1268,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, } if (subvol) { - ret = btrfs_mksubvol(&file->f_path, mnt_userns, name, + ret = btrfs_mksubvol(&file->f_path, idmap, name, namelen, NULL, readonly, inherit); } else { struct fd src = fdget(fd); @@ -1283,14 +1283,14 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, btrfs_info(BTRFS_I(file_inode(file))->root->fs_info, "Snapshot src from another FS"); ret = -EXDEV; - } else if (!inode_owner_or_capable(mnt_userns, src_inode)) { + } else if (!inode_owner_or_capable(idmap, src_inode)) { /* * Subvolume creation is not restricted, but snapshots * are limited to own subvolumes only */ ret = -EPERM; } else { - ret = btrfs_mksnapshot(&file->f_path, mnt_userns, + ret = btrfs_mksnapshot(&file->f_path, idmap, name, namelen, BTRFS_I(src_inode)->root, readonly, inherit); @@ -1317,7 +1317,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, return PTR_ERR(vol_args); vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file), + ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file), vol_args->name, vol_args->fd, subvol, false, NULL); @@ -1377,7 +1377,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, } } - ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file), + ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file), vol_args->name, vol_args->fd, subvol, readonly, inherit); if (ret) @@ -1422,7 +1422,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, u64 flags; int ret = 0; - if (!inode_owner_or_capable(file_mnt_user_ns(file), inode)) + if (!inode_owner_or_capable(file_mnt_idmap(file), inode)) return -EPERM; ret = mnt_want_write_file(file); @@ -1870,7 +1870,7 @@ out: return ret; } -static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns, +static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, struct inode *inode, struct btrfs_ioctl_ino_lookup_user_args *args) { @@ -1962,7 +1962,7 @@ static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns, ret = PTR_ERR(temp_inode); goto out_put; } - ret = inode_permission(mnt_userns, temp_inode, + ret = inode_permission(idmap, temp_inode, MAY_READ | MAY_EXEC); iput(temp_inode); if (ret) { @@ -2101,7 +2101,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp) return -EACCES; } - ret = btrfs_search_path_in_tree_user(file_mnt_user_ns(file), inode, args); + ret = btrfs_search_path_in_tree_user(file_mnt_idmap(file), inode, args); if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; @@ -2335,7 +2335,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, struct btrfs_root *dest = NULL; struct btrfs_ioctl_vol_args *vol_args = NULL; struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL; - struct user_namespace *mnt_userns = file_mnt_user_ns(file); + struct mnt_idmap *idmap = file_mnt_idmap(file); char *subvol_name, *subvol_name_ptr = NULL; int subvol_namelen; int err = 0; @@ -2428,7 +2428,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, * anywhere in the filesystem the user wouldn't be able * to delete without an idmapped mount. */ - if (old_dir != dir && mnt_userns != &init_user_ns) { + if (old_dir != dir && idmap != &nop_mnt_idmap) { err = -EOPNOTSUPP; goto free_parent; } @@ -2471,7 +2471,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); if (err == -EINTR) goto free_subvol_name; - dentry = lookup_one(mnt_userns, subvol_name, parent, subvol_namelen); + dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out_unlock_dir; @@ -2513,13 +2513,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, if (root == dest) goto out_dput; - err = inode_permission(mnt_userns, inode, MAY_WRITE | MAY_EXEC); + err = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC); if (err) goto out_dput; } /* check if subvolume may be deleted by a user */ - err = btrfs_may_delete(mnt_userns, dir, dentry, 1); + err = btrfs_may_delete(idmap, dir, dentry, 1); if (err) goto out_dput; @@ -2582,7 +2582,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) * running and allows defrag on files open in read-only mode. */ if (!capable(CAP_SYS_ADMIN) && - inode_permission(&init_user_ns, inode, MAY_WRITE)) { + inode_permission(&nop_mnt_idmap, inode, MAY_WRITE)) { ret = -EPERM; goto out; } @@ -3907,7 +3907,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info, } static long _btrfs_ioctl_set_received_subvol(struct file *file, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct btrfs_ioctl_received_subvol_args *sa) { struct inode *inode = file_inode(file); @@ -3919,7 +3919,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, int ret = 0; int received_uuid_changed; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EPERM; ret = mnt_want_write_file(file); @@ -4024,7 +4024,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file, args64->rtime.nsec = args32->rtime.nsec; args64->flags = args32->flags; - ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), args64); + ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_idmap(file), args64); if (ret) goto out; @@ -4058,7 +4058,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, if (IS_ERR(sa)) return PTR_ERR(sa); - ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), sa); + ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_idmap(file), sa); if (ret) goto out; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8a855d5ac2fa..d51b9a2f2f6e 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -6,7 +6,7 @@ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int btrfs_fileattr_set(struct user_namespace *mnt_userns, +int btrfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int btrfs_ioctl_get_supported_features(void __user *arg); void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5c636e00d77d..af97413abcf4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2765,9 +2765,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) /* * Old roots should be searched when inserting qgroup - * extent record + * extent record. + * + * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case, + * we may have some record inserted during + * NO_ACCOUNTING (thus no old_roots populated), but + * later we start rescan, which clears NO_ACCOUNTING, + * leaving some inserted records without old_roots + * populated. + * + * Those cases are rare and should not cause too much + * time spent during commit_transaction(). */ - if (WARN_ON(!record->old_roots)) { + if (!record->old_roots) { /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) @@ -2787,6 +2797,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) * current root. It's safe inside commit_transaction(). */ ctx.trans = trans; + ctx.time_seq = BTRFS_SEQ_LAST; ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) goto cleanup; @@ -3356,6 +3367,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; bool stopped = false; + bool did_leaf_rescans = false; path = btrfs_alloc_path(); if (!path) @@ -3376,6 +3388,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } err = qgroup_rescan_leaf(trans, path); + did_leaf_rescans = true; if (err > 0) btrfs_commit_transaction(trans); @@ -3396,16 +3409,23 @@ out: mutex_unlock(&fs_info->qgroup_rescan_lock); /* - * only update status, since the previous part has already updated the - * qgroup info. + * Only update status, since the previous part has already updated the + * qgroup info, and only if we did any actual work. This also prevents + * race with a concurrent quota disable, which has already set + * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at + * btrfs_quota_disable(). */ - trans = btrfs_start_transaction(fs_info->quota_root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (did_leaf_rescans) { + trans = btrfs_start_transaction(fs_info->quota_root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + btrfs_err(fs_info, + "fail to start transaction for status update: %d", + err); + } + } else { trans = NULL; - btrfs_err(fs_info, - "fail to start transaction for status update: %d", - err); } mutex_lock(&fs_info->qgroup_rescan_lock); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 2d90a6b5eb00..ff4b1d583788 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1426,12 +1426,20 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi u32 bio_size = 0; struct bio_vec *bvec; struct bvec_iter_all iter_all; + int i; bio_for_each_segment_all(bvec, bio, iter_all) bio_size += bvec->bv_len; - bitmap_set(rbio->error_bitmap, total_sector_nr, - bio_size >> rbio->bioc->fs_info->sectorsize_bits); + /* + * Since we can have multiple bios touching the error_bitmap, we cannot + * call bitmap_set() without protection. + * + * Instead use set_bit() for each bit, as set_bit() itself is atomic. + */ + for (i = total_sector_nr; i < total_sector_nr + + (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++) + set_bit(i, rbio->error_bitmap); } /* Verify the data sectors at read time. */ @@ -1886,7 +1894,7 @@ pstripe: sector->uptodate = 1; } if (failb >= 0) { - ret = verify_one_sector(rbio, faila, sector_nr); + ret = verify_one_sector(rbio, failb, sector_nr); if (ret < 0) goto cleanup; @@ -2646,7 +2654,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio) void **pointers = NULL; void **unmap_array = NULL; int sector_nr; - int ret; + int ret = 0; /* * @pointers array stores the pointer for each sector. diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index e65e6b6600a7..d50182b6deec 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8073,10 +8073,10 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) /* * Check that we don't overflow at later allocations, we request * clone_sources_count + 1 items, and compare to unsigned long inside - * access_ok. + * access_ok. Also set an upper limit for allocation size so this can't + * easily exhaust memory. Max number of clone sources is about 200K. */ - if (arg->clone_sources_count > - ULONG_MAX / sizeof(struct clone_root) - 1) { + if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) { ret = -EINVAL; goto out; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d28ee4e36f3d..69c09508afb5 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -407,7 +407,8 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) + if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && + (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) avail = 0; else avail = calc_available_free_space(fs_info, space_info, flush); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 93f52ee85f6f..433ce221dc5c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1705,7 +1705,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY)); if (ret < 0) goto restore; @@ -2514,6 +2514,7 @@ static __always_inline void btrfs_exit_btrfs_fs(void) static void __exit exit_btrfs_fs(void) { btrfs_exit_btrfs_fs(); + btrfs_cleanup_fs_uuids(); } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 181469fc0bb3..ca09cf9afce8 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -64,7 +64,7 @@ struct inode *btrfs_new_test_inode(void) BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - inode_init_owner(&init_user_ns, inode, NULL, S_IFREG); + inode_init_owner(&nop_mnt_idmap, inode, NULL, S_IFREG); return inode; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a3c43f0b1c95..58599189bd18 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2980,7 +2980,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = 0; if (ret) { blk_finish_plug(&plug); - btrfs_abort_transaction(trans, ret); btrfs_set_log_full_commit(trans); mutex_unlock(&root->log_mutex); goto out; @@ -3045,15 +3044,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, blk_finish_plug(&plug); btrfs_set_log_full_commit(trans); - - if (ret != -ENOSPC) { - btrfs_abort_transaction(trans, ret); - mutex_unlock(&log_root_tree->log_mutex); - goto out; - } + if (ret != -ENOSPC) + btrfs_err(fs_info, + "failed to update log for root %llu ret %d", + root->root_key.objectid, ret); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); - ret = BTRFS_LOG_FORCE_COMMIT; goto out; } @@ -3112,7 +3108,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out_wake_log_root; } else if (ret) { btrfs_set_log_full_commit(trans); - btrfs_abort_transaction(trans, ret); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } @@ -3581,17 +3576,19 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, } static int flush_dir_items_batch(struct btrfs_trans_handle *trans, - struct btrfs_root *log, + struct btrfs_inode *inode, struct extent_buffer *src, struct btrfs_path *dst_path, int start_slot, int count) { + struct btrfs_root *log = inode->root->log_root; char *ins_data = NULL; struct btrfs_item_batch batch; struct extent_buffer *dst; unsigned long src_offset; unsigned long dst_offset; + u64 last_index; struct btrfs_key key; u32 item_size; int ret; @@ -3649,6 +3646,19 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1); copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size); btrfs_release_path(dst_path); + + last_index = batch.keys[count - 1].offset; + ASSERT(last_index > inode->last_dir_index_offset); + + /* + * If for some unexpected reason the last item's index is not greater + * than the last index we logged, warn and return an error to fallback + * to a transaction commit. + */ + if (WARN_ON(last_index <= inode->last_dir_index_offset)) + ret = -EUCLEAN; + else + inode->last_dir_index_offset = last_index; out: kfree(ins_data); @@ -3698,7 +3708,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, } di = btrfs_item_ptr(src, i, struct btrfs_dir_item); - ctx->last_dir_item_offset = key.offset; /* * Skip ranges of items that consist only of dir item keys created @@ -3761,7 +3770,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, if (batch_size > 0) { int ret; - ret = flush_dir_items_batch(trans, log, src, dst_path, + ret = flush_dir_items_batch(trans, inode, src, dst_path, batch_start, batch_size); if (ret < 0) return ret; @@ -3826,7 +3835,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->slots[0]); if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; } + goto done; } @@ -3846,19 +3858,34 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; + goto done; } + btrfs_release_path(path); /* - * Find the first key from this transaction again. See the note for - * log_new_dir_dentries, if we're logging a directory recursively we - * won't be holding its i_mutex, which means we can modify the directory - * while we're logging it. If we remove an entry between our first - * search and this search we'll not find the key again and can just - * bail. + * Find the first key from this transaction again or the one we were at + * in the loop below in case we had to reschedule. We may be logging the + * directory without holding its VFS lock, which happen when logging new + * dentries (through log_new_dir_dentries()) or in some cases when we + * need to log the parent directory of an inode. This means a dir index + * key might be deleted from the inode's root, and therefore we may not + * find it anymore. If we can't find it, just move to the next key. We + * can not bail out and ignore, because if we do that we will simply + * not log dir index keys that come after the one that was just deleted + * and we can end up logging a dir index range that ends at (u64)-1 + * (@last_offset is initialized to that), resulting in removing dir + * entries we should not remove at log replay time. */ search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret > 0) + ret = btrfs_next_item(root, path); + if (ret < 0) + err = ret; + /* If ret is 1, there are no more keys in the inode's root. */ if (ret != 0) goto done; @@ -4031,7 +4058,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = BTRFS_DIR_START_INDEX; max_key = 0; - ctx->last_dir_item_offset = inode->last_dir_index_offset; while (1) { ret = log_dir_items(trans, inode, path, dst_path, @@ -4043,8 +4069,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = max_key + 1; } - inode->last_dir_index_offset = ctx->last_dir_item_offset; - return 0; } @@ -5580,8 +5604,10 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * LOG_INODE_EXISTS mode) and slow down other fsyncs or transaction * commits. */ - if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) + if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) { + btrfs_set_log_full_commit(trans); return BTRFS_LOG_FORCE_COMMIT; + } inode = btrfs_iget(root->fs_info->sb, ino, root); /* @@ -7459,8 +7485,11 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * not fail, but if it does, it's not serious, just bail out and * mark the log for a full commit. */ - if (WARN_ON_ONCE(ret < 0)) + if (WARN_ON_ONCE(ret < 0)) { + fscrypt_free_filename(&fname); goto out; + } + log_pinned = true; path = btrfs_alloc_path(); diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 85b43075ac58..85cd24cb0540 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -24,8 +24,6 @@ struct btrfs_log_ctx { bool logging_new_delayed_dentries; /* Indicate if the inode being logged was logged before. */ bool logged_before; - /* Tracks the last logged dir item/index key offset. */ - u64 last_dir_item_offset; struct inode *inode; struct list_head list; /* Only used for fast fsyncs. */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index aa25fa335d3e..df43093b7a46 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -403,6 +403,7 @@ void btrfs_free_device(struct btrfs_device *device) static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; + WARN_ON(fs_devices->opened); while (!list_empty(&fs_devices->devices)) { device = list_entry(fs_devices->devices.next, @@ -768,8 +769,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, BTRFS_SUPER_FLAG_CHANGING_FSID_V2); error = lookup_bdev(path, &path_devt); - if (error) + if (error) { + btrfs_err(NULL, "failed to lookup block device for path %s: %d", + path, error); return ERR_PTR(error); + } if (fsid_change_in_progress) { if (!has_metadata_uuid) @@ -836,6 +840,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, unsigned int nofs_flag; if (fs_devices->opened) { + btrfs_err(NULL, + "device %s belongs to fsid %pU, and the fs is already mounted", + path, fs_devices->fsid); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); } @@ -905,6 +912,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, * generation are equal. */ mutex_unlock(&fs_devices->device_list_mutex); + btrfs_err(NULL, +"device %s already registered with a higher generation, found %llu expect %llu", + path, found_transid, device->generation); return ERR_PTR(-EEXIST); } @@ -1172,9 +1182,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); close_fs_devices(fs_devices); - if (!fs_devices->opened) + if (!fs_devices->opened) { list_splice_init(&fs_devices->seed_list, &list); + /* + * If the struct btrfs_fs_devices is not assembled with any + * other device, it can be re-initialized during the next mount + * without the needing device-scan step. Therefore, it can be + * fully freed. + */ + if (fs_devices->num_devices == 1) { + list_del(&fs_devices->fs_list); + free_fs_devices(fs_devices); + } + } + + list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) { close_fs_devices(fs_devices); list_del(&fs_devices->seed_list); @@ -1591,7 +1614,7 @@ again: if (ret < 0) goto out; - while (1) { + while (search_start < search_end) { l = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(l)) { @@ -1614,6 +1637,9 @@ again: if (key.type != BTRFS_DEV_EXTENT_KEY) goto next; + if (key.offset > search_end) + break; + if (key.offset > search_start) { hole_size = key.offset - search_start; dev_extent_hole_check(device, &search_start, &hole_size, @@ -1674,6 +1700,7 @@ next: else ret = 0; + ASSERT(max_hole_start + max_hole_size <= search_end); out: btrfs_free_path(path); *start = max_hole_start; @@ -2005,42 +2032,42 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) return num_devices; } +static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, + struct block_device *bdev, int copy_num) +{ + struct btrfs_super_block *disk_super; + const size_t len = sizeof(disk_super->magic); + const u64 bytenr = btrfs_sb_offset(copy_num); + int ret; + + disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr); + if (IS_ERR(disk_super)) + return; + + memset(&disk_super->magic, 0, len); + folio_mark_dirty(virt_to_folio(disk_super)); + btrfs_release_disk_super(disk_super); + + ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1); + if (ret) + btrfs_warn(fs_info, "error clearing superblock number %d (%d)", + copy_num, ret); +} + void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct block_device *bdev, const char *device_path) { - struct btrfs_super_block *disk_super; int copy_num; if (!bdev) return; for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) { - struct page *page; - int ret; - - disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); - if (IS_ERR(disk_super)) - continue; - - if (bdev_is_zoned(bdev)) { + if (bdev_is_zoned(bdev)) btrfs_reset_sb_log_zones(bdev, copy_num); - continue; - } - - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - - page = virt_to_page(disk_super); - set_page_dirty(page); - lock_page(page); - /* write_on_page() unlocks the page */ - ret = write_one_page(page); - if (ret) - btrfs_warn(fs_info, - "error clearing superblock number %d (%d)", - copy_num, ret); - btrfs_release_disk_super(disk_super); - + else + btrfs_scratch_superblock(fs_info, bdev, copy_num); } /* Notify udev that device has changed */ diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 0ed4b119a7ca..0ebeaf4e81f9 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -370,7 +370,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler, } static int btrfs_xattr_handler_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) @@ -383,7 +383,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, } static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 01a13de11832..da7bb9187b68 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level) workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), zlib_inflate_workspacesize()); - workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL); + workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL); workspace->level = level; workspace->buf = NULL; /* diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a759668477bb..1f503e8e42d4 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -539,6 +539,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); + /* Overcommit does not work well with active zone tacking. */ + set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); } /* Validate superblock log */ diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index a69073a1d3f0..40052bdb3365 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -138,7 +138,7 @@ static int cachefiles_adjust_size(struct cachefiles_object *object) newattrs.ia_size = oi_size & PAGE_MASK; ret = cachefiles_inject_remove_error(); if (ret == 0) - ret = notify_change(&init_user_ns, file->f_path.dentry, + ret = notify_change(&nop_mnt_idmap, file->f_path.dentry, &newattrs, NULL); if (ret < 0) goto truncate_failed; @@ -148,7 +148,7 @@ static int cachefiles_adjust_size(struct cachefiles_object *object) newattrs.ia_size = ni_size; ret = cachefiles_inject_write_error(); if (ret == 0) - ret = notify_change(&init_user_ns, file->f_path.dentry, + ret = notify_change(&nop_mnt_idmap, file->f_path.dentry, &newattrs, NULL); truncate_failed: diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 03ca8f2f657a..82219a8f6084 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -130,7 +130,7 @@ retry: goto mkdir_error; ret = cachefiles_inject_write_error(); if (ret == 0) - ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700); + ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); @@ -245,7 +245,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache, ret = cachefiles_inject_remove_error(); if (ret == 0) { - ret = vfs_unlink(&init_user_ns, d_backing_inode(dir), dentry, NULL); + ret = vfs_unlink(&nop_mnt_idmap, d_backing_inode(dir), dentry, NULL); if (ret == -EIO) cachefiles_io_error(cache, "Unlink failed"); } @@ -382,10 +382,10 @@ try_again: cachefiles_io_error(cache, "Rename security error %d", ret); } else { struct renamedata rd = { - .old_mnt_userns = &init_user_ns, + .old_mnt_idmap = &nop_mnt_idmap, .old_dir = d_inode(dir), .old_dentry = rep, - .new_mnt_userns = &init_user_ns, + .new_mnt_idmap = &nop_mnt_idmap, .new_dir = d_inode(cache->graveyard), .new_dentry = grave, }; @@ -451,7 +451,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) ret = cachefiles_inject_write_error(); if (ret == 0) { - file = vfs_tmpfile_open(&init_user_ns, &parentpath, S_IFREG, + file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG, O_RDWR | O_LARGEFILE | O_DIRECT, cache->cache_cred); ret = PTR_ERR_OR_ZERO(file); @@ -714,7 +714,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, ret = cachefiles_inject_read_error(); if (ret == 0) - ret = vfs_link(object->file->f_path.dentry, &init_user_ns, + ret = vfs_link(object->file->f_path.dentry, &nop_mnt_idmap, d_inode(fan), dentry, NULL); if (ret < 0) { trace_cachefiles_vfs_error(object, d_inode(fan), ret, diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 00b087c14995..bcb6173943ee 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -65,7 +65,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object) ret = cachefiles_inject_write_error(); if (ret == 0) - ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, + ret = vfs_setxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, sizeof(struct cachefiles_xattr) + len, 0); if (ret < 0) { trace_cachefiles_vfs_error(object, file_inode(file), ret, @@ -108,7 +108,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file xlen = cachefiles_inject_read_error(); if (xlen == 0) - xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, tlen); + xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen); if (xlen != tlen) { if (xlen < 0) trace_cachefiles_vfs_error(object, file_inode(file), xlen, @@ -150,7 +150,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, ret = cachefiles_inject_remove_error(); if (ret == 0) - ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache); + ret = vfs_removexattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache); if (ret < 0) { trace_cachefiles_vfs_error(object, d_inode(dentry), ret, cachefiles_trace_remxattr_error); @@ -207,7 +207,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) ret = cachefiles_inject_write_error(); if (ret == 0) - ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, + ret = vfs_setxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len, 0); if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, @@ -249,7 +249,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) xlen = cachefiles_inject_read_error(); if (xlen == 0) - xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, len); + xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len); if (xlen != len) { if (xlen < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index c7e8dd5b58d4..6945a938d396 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -85,7 +85,7 @@ retry: return acl; } -int ceph_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int ret = 0, size = 0; @@ -105,7 +105,7 @@ int ceph_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; if (acl) { - ret = posix_acl_update_mode(&init_user_ns, inode, + ret = posix_acl_update_mode(&nop_mnt_idmap, inode, &new_mode, &acl); if (ret) goto out; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8c74871e37c9..cac4083e387a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -305,7 +305,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_osd_request *req; + struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); struct iov_iter iter; struct page **pages; @@ -313,6 +313,11 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) int err = 0; u64 len = subreq->len; + if (ceph_inode_is_shutdown(inode)) { + err = -EIO; + goto out; + } + if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) return; @@ -563,6 +568,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p idx %lu\n", page, page->index); + if (ceph_inode_is_shutdown(inode)) + return -EIO; + /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { @@ -1643,7 +1651,7 @@ int ceph_uninline_data(struct file *file) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req = NULL; - struct ceph_cap_flush *prealloc_cf; + struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; u64 inline_version = CEPH_INLINE_NONE; struct page *pages[1]; @@ -1657,6 +1665,11 @@ int ceph_uninline_data(struct file *file) dout("uninline_data %p %llx.%llx inline_version %llu\n", inode, ceph_vinop(inode), inline_version); + if (ceph_inode_is_shutdown(inode)) { + err = -EIO; + goto out; + } + if (inline_version == CEPH_INLINE_NONE) return 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 4b159f97fe7b..7cc20772eac9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -9,6 +9,7 @@ #include <linux/wait.h> #include <linux/writeback.h> #include <linux/iversion.h> +#include <linux/filelock.h> #include "super.h" #include "mds_client.h" @@ -2913,7 +2914,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got while (true) { flags &= CEPH_FILE_MODE_MASK; - if (atomic_read(&fi->num_locks)) + if (vfs_inode_has_locks(inode)) flags |= CHECK_FILELOCK; _got = 0; ret = try_get_cap_refs(inode, need, want, endoff, @@ -4078,6 +4079,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, void *p, *end; struct cap_extra_info extra_info = {}; bool queue_trunc; + bool close_sessions = false; dout("handle_caps from mds%d\n", session->s_mds); @@ -4215,9 +4217,13 @@ void ceph_handle_caps(struct ceph_mds_session *session, realm = NULL; if (snaptrace_len) { down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, snaptrace, - snaptrace + snaptrace_len, - false, &realm); + if (ceph_update_snap_trace(mdsc, snaptrace, + snaptrace + snaptrace_len, + false, &realm)) { + up_write(&mdsc->snap_rwsem); + close_sessions = true; + goto done; + } downgrade_write(&mdsc->snap_rwsem); } else { down_read(&mdsc->snap_rwsem); @@ -4277,6 +4283,11 @@ done_unlocked: iput(inode); out: ceph_put_string(extra_info.pool_ns); + + /* Defer closing the sessions after s_mutex lock being released */ + if (close_sessions) + ceph_mdsc_close_sessions(mdsc); + return; flush_cap_releases: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6c7026cc8988..0ced8b570e42 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -845,7 +845,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) return PTR_ERR(result); } -static int ceph_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); @@ -905,13 +905,13 @@ out: return err; } -static int ceph_create(struct user_namespace *mnt_userns, struct inode *dir, +static int ceph_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return ceph_mknod(mnt_userns, dir, dentry, mode, 0); + return ceph_mknod(idmap, dir, dentry, mode, 0); } -static int ceph_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *dest) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); @@ -970,7 +970,7 @@ out: return err; } -static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); @@ -1269,7 +1269,7 @@ out: return err; } -static int ceph_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 764598e1efd9..b5cff85925a1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2011,6 +2011,9 @@ static int ceph_zero_partial_object(struct inode *inode, loff_t zero = 0; int op; + if (ceph_inode_is_shutdown(inode)) + return -EIO; + if (!length) { op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE; length = &zero; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 23d05ec87fcc..8e5f41d45283 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2227,7 +2227,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) /* * setattr */ -int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -2240,7 +2240,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (ceph_inode_is_shutdown(inode)) return -ESTALE; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err != 0) return err; @@ -2255,7 +2255,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, err = __ceph_setattr(inode, attr); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) - err = posix_acl_chmod(&init_user_ns, dentry, attr->ia_mode); + err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode); return err; } @@ -2397,7 +2397,7 @@ out: * Check inode permissions. We verify we have a valid value for * the AUTH cap, then call the generic handler. */ -int ceph_permission(struct user_namespace *mnt_userns, struct inode *inode, +int ceph_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int err; @@ -2408,7 +2408,7 @@ int ceph_permission(struct user_namespace *mnt_userns, struct inode *inode, err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false); if (!err) - err = generic_permission(&init_user_ns, inode, mask); + err = generic_permission(&nop_mnt_idmap, inode, mask); return err; } @@ -2417,10 +2417,10 @@ static int statx_to_caps(u32 want, umode_t mode) { int mask = 0; - if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) + if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_AUTH_SHARED; - if (want & (STATX_NLINK|STATX_CTIME)) { + if (want & (STATX_NLINK|STATX_CTIME|STATX_CHANGE_COOKIE)) { /* * The link count for directories depends on inode->i_subdirs, * and that is only updated when Fs caps are held. @@ -2431,11 +2431,10 @@ static int statx_to_caps(u32 want, umode_t mode) mask |= CEPH_CAP_LINK_SHARED; } - if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| - STATX_BLOCKS)) + if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|STATX_BLOCKS|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_FILE_SHARED; - if (want & (STATX_CTIME)) + if (want & (STATX_CTIME|STATX_CHANGE_COOKIE)) mask |= CEPH_CAP_XATTR_SHARED; return mask; @@ -2445,7 +2444,7 @@ static int statx_to_caps(u32 want, umode_t mode) * Get all the attributes. If we have sufficient caps for the requested attrs, * then we can avoid talking to the MDS at all. */ -int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct inode *inode = d_inode(path->dentry); @@ -2466,7 +2465,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, return err; } - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->ino = ceph_present_inode(inode); /* @@ -2478,6 +2477,11 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, valid_mask |= STATX_BTIME; } + if (request_mask & STATX_CHANGE_COOKIE) { + stat->change_cookie = inode_peek_iversion_raw(inode); + valid_mask |= STATX_CHANGE_COOKIE; + } + if (ceph_snap(inode) == CEPH_NOSNAP) stat->dev = sb->s_dev; else @@ -2519,6 +2523,8 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->nlink = 1 + 1 + ci->i_subdirs; } + stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; stat->result_mask = request_mask & valid_mask; return err; } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index f3b461c708a8..cb51c7e9c8e2 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -7,6 +7,7 @@ #include "super.h" #include "mds_client.h" +#include <linux/filelock.h> #include <linux/ceph/pagelist.h> static u64 lock_secret; @@ -32,24 +33,36 @@ void __init ceph_flock_init(void) static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - struct ceph_file_info *fi = dst->fl_file->private_data; struct inode *inode = file_inode(dst->fl_file); atomic_inc(&ceph_inode(inode)->i_filelock_ref); - atomic_inc(&fi->num_locks); + dst->fl_u.ceph.inode = igrab(inode); } +/* + * Do not use the 'fl->fl_file' in release function, which + * is possibly already released by another thread. + */ static void ceph_fl_release_lock(struct file_lock *fl) { - struct ceph_file_info *fi = fl->fl_file->private_data; - struct inode *inode = file_inode(fl->fl_file); - struct ceph_inode_info *ci = ceph_inode(inode); - atomic_dec(&fi->num_locks); + struct inode *inode = fl->fl_u.ceph.inode; + struct ceph_inode_info *ci; + + /* + * If inode is NULL it should be a request file_lock, + * nothing we can do. + */ + if (!inode) + return; + + ci = ceph_inode(inode); if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; spin_unlock(&ci->i_ceph_lock); } + fl->fl_u.ceph.inode = NULL; + iput(inode); } static const struct file_lock_operations ceph_fl_lock_ops = { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 26a0a8b9975e..27a245d959c0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -806,6 +806,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) + return ERR_PTR(-EIO); + if (mds >= mdsc->mdsmap->possible_max_rank) return ERR_PTR(-EINVAL); @@ -1478,6 +1481,9 @@ static int __open_session(struct ceph_mds_client *mdsc, int mstate; int mds = session->s_mds; + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) + return -EIO; + /* wait for mds to go active? */ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); dout("open_session to mds%d (%s)\n", mds, @@ -2860,6 +2866,11 @@ static void __do_request(struct ceph_mds_client *mdsc, return; } + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) { + dout("do_request metadata corrupted\n"); + err = -EIO; + goto finish; + } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { dout("do_request timed out\n"); @@ -3245,6 +3256,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) u64 tid; int err, result; int mds = session->s_mds; + bool close_sessions = false; if (msg->front.iov_len < sizeof(*head)) { pr_err("mdsc_handle_reply got corrupt (short) reply\n"); @@ -3351,10 +3363,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) realm = NULL; if (rinfo->snapblob_len) { down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, rinfo->snapblob, + err = ceph_update_snap_trace(mdsc, rinfo->snapblob, rinfo->snapblob + rinfo->snapblob_len, le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP, &realm); + if (err) { + up_write(&mdsc->snap_rwsem); + close_sessions = true; + if (err == -EIO) + ceph_msg_dump(msg); + goto out_err; + } downgrade_write(&mdsc->snap_rwsem); } else { down_read(&mdsc->snap_rwsem); @@ -3412,6 +3431,10 @@ out_err: req->r_end_latency, err); out: ceph_mdsc_put_request(req); + + /* Defer closing the sessions after s_mutex lock being released */ + if (close_sessions) + ceph_mdsc_close_sessions(mdsc); return; } @@ -3662,6 +3685,12 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_FLUSHMSG: + /* flush cap releases */ + spin_lock(&session->s_cap_lock); + if (session->s_num_cap_releases) + ceph_flush_cap_releases(mdsc, session); + spin_unlock(&session->s_cap_lock); + send_flushmsg_ack(mdsc, session, seq); break; @@ -5011,7 +5040,7 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) } /* - * called after sb is ro. + * called after sb is ro or when metadata corrupted. */ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { @@ -5301,7 +5330,8 @@ static void mds_peer_reset(struct ceph_connection *con) struct ceph_mds_client *mdsc = s->s_mdsc; pr_warn("mds%d closed our session\n", s->s_mds); - send_mds_reconnect(mdsc, s); + if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) + send_mds_reconnect(mdsc, s); } static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index e4151852184e..87007203f130 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> +#include <linux/fs.h> #include <linux/sort.h> #include <linux/slab.h> #include <linux/iversion.h> @@ -766,8 +767,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; + struct ceph_client *client = mdsc->fsc->client; int rebuild_snapcs; int err = -ENOMEM; + int ret; LIST_HEAD(dirty_realms); lockdep_assert_held_write(&mdsc->snap_rwsem); @@ -884,6 +887,27 @@ fail: if (first_realm) ceph_put_snap_realm(mdsc, first_realm); pr_err("%s error %d\n", __func__, err); + + /* + * When receiving a corrupted snap trace we don't know what + * exactly has happened in MDS side. And we shouldn't continue + * writing to OSD, which may corrupt the snapshot contents. + * + * Just try to blocklist this kclient and then this kclient + * must be remounted to continue after the corrupted metadata + * fixed in the MDS side. + */ + WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO); + ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr); + if (ret) + pr_err("%s failed to blocklist %s: %d\n", __func__, + ceph_pr_addr(&client->msgr.inst.addr), ret); + + WARN(1, "%s: %s%sdo remount to continue%s", + __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), + ret ? "" : " was blocklisted, ", + err == -EIO ? " after corrupted snaptrace is fixed" : ""); + return err; } @@ -984,6 +1008,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, __le64 *split_inos = NULL, *split_realms = NULL; int i; int locked_rwsem = 0; + bool close_sessions = false; /* decode */ if (msg->front.iov_len < sizeof(*h)) @@ -1092,8 +1117,12 @@ skip_inode: * update using the provided snap trace. if we are deleting a * snap, we can avoid queueing cap_snaps. */ - ceph_update_snap_trace(mdsc, p, e, - op == CEPH_SNAP_OP_DESTROY, NULL); + if (ceph_update_snap_trace(mdsc, p, e, + op == CEPH_SNAP_OP_DESTROY, + NULL)) { + close_sessions = true; + goto bad; + } if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ @@ -1112,6 +1141,9 @@ bad: out: if (locked_rwsem) up_write(&mdsc->snap_rwsem); + + if (close_sessions) + ceph_mdsc_close_sessions(mdsc); return; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 30bdb391a0dc..6ecca2c6d137 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -100,6 +100,17 @@ struct ceph_mount_options { char *mon_addr; }; +/* mount state */ +enum { + CEPH_MOUNT_MOUNTING, + CEPH_MOUNT_MOUNTED, + CEPH_MOUNT_UNMOUNTING, + CEPH_MOUNT_UNMOUNTED, + CEPH_MOUNT_SHUTDOWN, + CEPH_MOUNT_RECOVER, + CEPH_MOUNT_FENCE_IO, +}; + #define CEPH_ASYNC_CREATE_CONFLICT_BITS 8 struct ceph_fs_client { @@ -790,7 +801,6 @@ struct ceph_file_info { struct list_head rw_contexts; u32 filp_gen; - atomic_t num_locks; }; struct ceph_dir_file_info { @@ -1040,12 +1050,12 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) { return __ceph_do_getattr(inode, NULL, mask, force); } -extern int ceph_permission(struct user_namespace *mnt_userns, +extern int ceph_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); extern int __ceph_setattr(struct inode *inode, struct iattr *attr); -extern int ceph_setattr(struct user_namespace *mnt_userns, +extern int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -extern int ceph_getattr(struct user_namespace *mnt_userns, +extern int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); void ceph_inode_shutdown(struct inode *inode); @@ -1118,7 +1128,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx); #ifdef CONFIG_CEPH_FS_POSIX_ACL struct posix_acl *ceph_get_acl(struct inode *, int, bool); -int ceph_set_acl(struct user_namespace *mnt_userns, +int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int ceph_pre_init_acls(struct inode *dir, umode_t *mode, struct ceph_acl_sec_ctx *as_ctx); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index f31350cda960..f65b07cc33a2 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1285,7 +1285,7 @@ static int ceph_get_xattr_handler(const struct xattr_handler *handler, } static int ceph_set_xattr_handler(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index bbf58c2439da..9a2d390bd06f 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1674,7 +1674,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, return rc; } -struct posix_acl *cifs_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *cifs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { #if defined(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) && defined(CONFIG_CIFS_POSIX) @@ -1738,7 +1738,7 @@ out: #endif } -int cifs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int cifs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { #if defined(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) && defined(CONFIG_CIFS_POSIX) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5db73c0f792a..cbc18b4a9cb2 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -278,6 +278,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) + * unicode length of a netbios domain name */ + kfree_sensitive(ses->auth_key.response); ses->auth_key.len = size + 2 * dlen; ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 10e00c624922..cb7c5460a80b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/mount.h> #include <linux/slab.h> #include <linux/init.h> @@ -345,7 +346,7 @@ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) return -EOPNOTSUPP; } -static int cifs_permission(struct user_namespace *mnt_userns, +static int cifs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct cifs_sb_info *cifs_sb; @@ -361,7 +362,7 @@ static int cifs_permission(struct user_namespace *mnt_userns, on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } static struct kmem_cache *cifs_inode_cachep; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 63a0ac2b9355..b58cd737b21e 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -49,7 +49,7 @@ extern void cifs_sb_deactive(struct super_block *sb); /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); -extern int cifs_create(struct user_namespace *, struct inode *, +extern int cifs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool excl); extern int cifs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t); @@ -57,12 +57,12 @@ extern struct dentry *cifs_lookup(struct inode *, struct dentry *, unsigned int); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); -extern int cifs_mknod(struct user_namespace *, struct inode *, struct dentry *, +extern int cifs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -extern int cifs_mkdir(struct user_namespace *, struct inode *, struct dentry *, +extern int cifs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); extern int cifs_rmdir(struct inode *, struct dentry *); -extern int cifs_rename2(struct user_namespace *, struct inode *, +extern int cifs_rename2(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern int cifs_revalidate_file_attr(struct file *filp); @@ -72,9 +72,9 @@ extern int cifs_revalidate_dentry(struct dentry *); extern int cifs_invalidate_mapping(struct inode *inode); extern int cifs_revalidate_mapping(struct inode *inode); extern int cifs_zap_mapping(struct inode *inode); -extern int cifs_getattr(struct user_namespace *, const struct path *, +extern int cifs_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); -extern int cifs_setattr(struct user_namespace *, struct dentry *, +extern int cifs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern int cifs_fiemap(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); @@ -124,7 +124,7 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path); /* Functions related to symlinks */ extern const char *cifs_get_link(struct dentry *, struct inode *, struct delayed_call *); -extern int cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode, +extern int cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, const char *symname); #ifdef CONFIG_CIFS_XATTR diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cfdd5bf701a1..cd8171a1c9a0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -26,6 +26,7 @@ #include <uapi/linux/cifs/cifs_mount.h> #include "../smbfs_common/smb2pdu.h" #include "smb2pdu.h" +#include <linux/filelock.h> #define SMB_PATH_MAX 260 #define CIFS_PORT 445 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1207b39686fb..b8a47704a6ef 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -225,9 +225,9 @@ extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, const char *, u32 *, u32); extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, const struct cifs_fid *, u32 *, u32); -extern struct posix_acl *cifs_get_acl(struct user_namespace *mnt_userns, +extern struct posix_acl *cifs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type); -extern int cifs_set_acl(struct user_namespace *mnt_userns, +extern int cifs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 23f10e0d6e7e..60dd4e37030a 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -15,6 +15,7 @@ /* want to reuse a stale file handle and only the caller knows the file info */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/kernel.h> #include <linux/vfs.h> #include <linux/slab.h> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d371259d6808..b2a04b4e89a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2606,11 +2606,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, smb2_query_server_interfaces); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (ses->server->dialect >= SMB30_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + /* schedule query interfaces poll */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index b541e68378f6..b64d20374b9c 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -327,8 +327,8 @@ static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb return rc; } -static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, - size_t tcp_host_len, char *share, bool *target_match) +static int target_share_matches_server(struct TCP_Server_Info *server, char *share, + bool *target_match) { int rc = 0; const char *dfs_host; @@ -338,13 +338,16 @@ static int target_share_matches_server(struct TCP_Server_Info *server, const cha extract_unc_hostname(share, &dfs_host, &dfs_host_len); /* Check if hostnames or addresses match */ - if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { - cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, - dfs_host, (int)tcp_host_len, tcp_host); + cifs_server_lock(server); + if (dfs_host_len != strlen(server->hostname) || + strncasecmp(dfs_host, server->hostname, dfs_host_len)) { + cifs_dbg(FYI, "%s: %.*s doesn't match %s\n", __func__, + (int)dfs_host_len, dfs_host, server->hostname); rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); if (rc) cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); } + cifs_server_unlock(server); return rc; } @@ -358,13 +361,9 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t struct cifs_ses *root_ses = CIFS_DFS_ROOT_SES(tcon->ses); struct cifs_tcon *ipc = root_ses->tcon_ipc; char *share = NULL, *prefix = NULL; - const char *tcp_host; - size_t tcp_host_len; struct dfs_cache_tgt_iterator *tit; bool target_match; - extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); - tit = dfs_cache_get_tgt_iterator(tl); if (!tit) { rc = -ENOENT; @@ -387,8 +386,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t break; } - rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, - &target_match); + rc = target_share_matches_server(server, share, &target_match); if (rc) break; if (!target_match) { @@ -401,8 +399,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t if (ipc->need_reconnect) { scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); - if (rc) - break; + cifs_dbg(FYI, "%s: reconnect ipc: %d\n", __func__, rc); } scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); @@ -498,7 +495,9 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru } if (tcon->ipc) { + cifs_server_lock(server); scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + cifs_server_unlock(server); rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); goto out; } diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 43ad1176dcb9..ac86bd0ebd63 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -269,7 +269,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v) list_for_each_entry(t, &ce->tlist, list) { seq_printf(m, " %s%s\n", t->name, - ce->tgthint == t ? " (target hint)" : ""); + READ_ONCE(ce->tgthint) == t ? " (target hint)" : ""); } } } @@ -321,7 +321,7 @@ static inline void dump_tgts(const struct cache_entry *ce) cifs_dbg(FYI, "target list:\n"); list_for_each_entry(t, &ce->tlist, list) { cifs_dbg(FYI, " %s%s\n", t->name, - ce->tgthint == t ? " (target hint)" : ""); + READ_ONCE(ce->tgthint) == t ? " (target hint)" : ""); } } @@ -427,7 +427,7 @@ static int cache_entry_hash(const void *data, int size, unsigned int *hash) /* Return target hint of a DFS cache entry */ static inline char *get_tgt_name(const struct cache_entry *ce) { - struct cache_dfs_tgt *t = ce->tgthint; + struct cache_dfs_tgt *t = READ_ONCE(ce->tgthint); return t ? t->name : ERR_PTR(-ENOENT); } @@ -470,6 +470,7 @@ static struct cache_dfs_tgt *alloc_target(const char *name, int path_consumed) static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs, struct cache_entry *ce, const char *tgthint) { + struct cache_dfs_tgt *target; int i; ce->ttl = max_t(int, refs[0].ttl, CACHE_MIN_TTL); @@ -496,8 +497,9 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs, ce->numtgts++; } - ce->tgthint = list_first_entry_or_null(&ce->tlist, - struct cache_dfs_tgt, list); + target = list_first_entry_or_null(&ce->tlist, struct cache_dfs_tgt, + list); + WRITE_ONCE(ce->tgthint, target); return 0; } @@ -558,7 +560,8 @@ static void remove_oldest_entry_locked(void) } /* Add a new DFS cache entry */ -static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs) +static struct cache_entry *add_cache_entry_locked(struct dfs_info3_param *refs, + int numrefs) { int rc; struct cache_entry *ce; @@ -573,11 +576,11 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs) rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash); if (rc) - return rc; + return ERR_PTR(rc); ce = alloc_cache_entry(refs, numrefs); if (IS_ERR(ce)) - return PTR_ERR(ce); + return ce; spin_lock(&cache_ttl_lock); if (!cache_ttl) { @@ -594,7 +597,7 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs) atomic_inc(&cache_count); - return 0; + return ce; } /* Check if two DFS paths are equal. @s1 and @s2 are expected to be in @cache_cp's charset */ @@ -641,7 +644,9 @@ static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int h * * Use whole path components in the match. Must be called with htable_rw_lock held. * + * Return cached entry if successful. * Return ERR_PTR(-ENOENT) if the entry is not found. + * Return error ptr otherwise. */ static struct cache_entry *lookup_cache_entry(const char *path) { @@ -711,14 +716,15 @@ void dfs_cache_destroy(void) static int update_cache_entry_locked(struct cache_entry *ce, const struct dfs_info3_param *refs, int numrefs) { + struct cache_dfs_tgt *target; + char *th = NULL; int rc; - char *s, *th = NULL; WARN_ON(!rwsem_is_locked(&htable_rw_lock)); - if (ce->tgthint) { - s = ce->tgthint->name; - th = kstrdup(s, GFP_ATOMIC); + target = READ_ONCE(ce->tgthint); + if (target) { + th = kstrdup(target->name, GFP_ATOMIC); if (!th) return -ENOMEM; } @@ -767,51 +773,75 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const * * For interlinks, cifs_mount() and expand_dfs_referral() are supposed to * handle them properly. + * + * On success, return entry with acquired lock for reading, otherwise error ptr. */ -static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path) +static struct cache_entry *cache_refresh_path(const unsigned int xid, + struct cifs_ses *ses, + const char *path, + bool force_refresh) { - int rc; - struct cache_entry *ce; struct dfs_info3_param *refs = NULL; + struct cache_entry *ce; int numrefs = 0; - bool newent = false; + int rc; cifs_dbg(FYI, "%s: search path: %s\n", __func__, path); - down_write(&htable_rw_lock); + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); if (!IS_ERR(ce)) { - if (!cache_entry_expired(ce)) { - dump_ce(ce); - up_write(&htable_rw_lock); - return 0; - } - } else { - newent = true; + if (!force_refresh && !cache_entry_expired(ce)) + return ce; + } else if (PTR_ERR(ce) != -ENOENT) { + up_read(&htable_rw_lock); + return ce; } /* - * Either the entry was not found, or it is expired. + * Unlock shared access as we don't want to hold any locks while getting + * a new referral. The @ses used for performing the I/O could be + * reconnecting and it acquires @htable_rw_lock to look up the dfs cache + * in order to failover -- if necessary. + */ + up_read(&htable_rw_lock); + + /* + * Either the entry was not found, or it is expired, or it is a forced + * refresh. * Request a new DFS referral in order to create or update a cache entry. */ rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - if (rc) - goto out_unlock; + if (rc) { + ce = ERR_PTR(rc); + goto out; + } dump_refs(refs, numrefs); - if (!newent) { - rc = update_cache_entry_locked(ce, refs, numrefs); - goto out_unlock; + down_write(&htable_rw_lock); + /* Re-check as another task might have it added or refreshed already */ + ce = lookup_cache_entry(path); + if (!IS_ERR(ce)) { + if (force_refresh || cache_entry_expired(ce)) { + rc = update_cache_entry_locked(ce, refs, numrefs); + if (rc) + ce = ERR_PTR(rc); + } + } else if (PTR_ERR(ce) == -ENOENT) { + ce = add_cache_entry_locked(refs, numrefs); } - rc = add_cache_entry_locked(refs, numrefs); + if (IS_ERR(ce)) { + up_write(&htable_rw_lock); + goto out; + } -out_unlock: - up_write(&htable_rw_lock); + downgrade_write(&htable_rw_lock); +out: free_dfs_info_array(refs, numrefs); - return rc; + return ce; } /* @@ -878,7 +908,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl) } it->it_path_consumed = t->path_consumed; - if (ce->tgthint == t) + if (READ_ONCE(ce->tgthint) == t) list_add(&it->it_list, head); else list_add_tail(&it->it_list, head); @@ -931,15 +961,8 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl if (IS_ERR(npath)) return PTR_ERR(npath); - rc = cache_refresh_path(xid, ses, npath); - if (rc) - goto out_free_path; - - down_read(&htable_rw_lock); - - ce = lookup_cache_entry(npath); + ce = cache_refresh_path(xid, ses, npath, false); if (IS_ERR(ce)) { - up_read(&htable_rw_lock); rc = PTR_ERR(ce); goto out_free_path; } @@ -1003,72 +1026,6 @@ out_unlock: } /** - * dfs_cache_update_tgthint - update target hint of a DFS cache entry - * - * If it doesn't find the cache entry, then it will get a DFS referral for @path - * and create a new entry. - * - * In case the cache entry exists but expired, it will get a DFS referral - * for @path and then update the respective cache entry. - * - * @xid: syscall id - * @ses: smb session - * @cp: codepage - * @remap: type of character remapping for paths - * @path: path to lookup in DFS referral cache - * @it: DFS target iterator - * - * Return zero if the target hint was updated successfully, otherwise non-zero. - */ -int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *cp, int remap, const char *path, - const struct dfs_cache_tgt_iterator *it) -{ - int rc; - const char *npath; - struct cache_entry *ce; - struct cache_dfs_tgt *t; - - npath = dfs_cache_canonical_path(path, cp, remap); - if (IS_ERR(npath)) - return PTR_ERR(npath); - - cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath); - - rc = cache_refresh_path(xid, ses, npath); - if (rc) - goto out_free_path; - - down_write(&htable_rw_lock); - - ce = lookup_cache_entry(npath); - if (IS_ERR(ce)) { - rc = PTR_ERR(ce); - goto out_unlock; - } - - t = ce->tgthint; - - if (likely(!strcasecmp(it->it_name, t->name))) - goto out_unlock; - - list_for_each_entry(t, &ce->tlist, list) { - if (!strcasecmp(t->name, it->it_name)) { - ce->tgthint = t; - cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, - it->it_name); - break; - } - } - -out_unlock: - up_write(&htable_rw_lock); -out_free_path: - kfree(npath); - return rc; -} - -/** * dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry * without sending any requests to the currently connected server. * @@ -1092,21 +1049,20 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt cifs_dbg(FYI, "%s: path: %s\n", __func__, path); - if (!down_write_trylock(&htable_rw_lock)) - return; + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); if (IS_ERR(ce)) goto out_unlock; - t = ce->tgthint; + t = READ_ONCE(ce->tgthint); if (unlikely(!strcasecmp(it->it_name, t->name))) goto out_unlock; list_for_each_entry(t, &ce->tlist, list) { if (!strcasecmp(t->name, it->it_name)) { - ce->tgthint = t; + WRITE_ONCE(ce->tgthint, t); cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, it->it_name); break; @@ -1114,7 +1070,7 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt } out_unlock: - up_write(&htable_rw_lock); + up_read(&htable_rw_lock); } /** @@ -1299,7 +1255,6 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c * Resolve share's hostname and check if server address matches. Otherwise just ignore it * as we could not have upcall to resolve hostname or failed to convert ip address. */ - match = true; extract_unc_hostname(s1, &host, &hostlen); scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); @@ -1321,35 +1276,37 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new * target shares in @refs. */ -static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl, - const struct dfs_info3_param *refs, int numrefs) +static void mark_for_reconnect_if_needed(struct TCP_Server_Info *server, + struct dfs_cache_tgt_list *old_tl, + struct dfs_cache_tgt_list *new_tl) { - struct dfs_cache_tgt_iterator *it; - int i; - - for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) { - for (i = 0; i < numrefs; i++) { - if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it), - refs[i].node_name)) + struct dfs_cache_tgt_iterator *oit, *nit; + + for (oit = dfs_cache_get_tgt_iterator(old_tl); oit; + oit = dfs_cache_get_next_tgt(old_tl, oit)) { + for (nit = dfs_cache_get_tgt_iterator(new_tl); nit; + nit = dfs_cache_get_next_tgt(new_tl, nit)) { + if (target_share_equal(server, + dfs_cache_get_tgt_name(oit), + dfs_cache_get_tgt_name(nit))) return; } } cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); - cifs_signal_cifsd_for_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(server, true); } /* Refresh dfs referral of tcon and mark it for reconnect if needed */ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_refresh) { - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + struct dfs_cache_tgt_list old_tl = DFS_CACHE_TGT_LIST_INIT(old_tl); + struct dfs_cache_tgt_list new_tl = DFS_CACHE_TGT_LIST_INIT(new_tl); struct cifs_ses *ses = CIFS_DFS_ROOT_SES(tcon->ses); struct cifs_tcon *ipc = ses->tcon_ipc; - struct dfs_info3_param *refs = NULL; bool needs_refresh = false; struct cache_entry *ce; unsigned int xid; - int numrefs = 0; int rc = 0; xid = get_xid(); @@ -1358,9 +1315,8 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r ce = lookup_cache_entry(path); needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); if (!IS_ERR(ce)) { - rc = get_targets(ce, &tl); - if (rc) - cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc); + rc = get_targets(ce, &old_tl); + cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc); } up_read(&htable_rw_lock); @@ -1377,26 +1333,18 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r } spin_unlock(&ipc->tc_lock); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - if (!rc) { - /* Create or update a cache entry with the new referral */ - dump_refs(refs, numrefs); - - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - if (IS_ERR(ce)) - add_cache_entry_locked(refs, numrefs); - else if (force_refresh || cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - - mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs); + ce = cache_refresh_path(xid, ses, path, true); + if (!IS_ERR(ce)) { + rc = get_targets(ce, &new_tl); + up_read(&htable_rw_lock); + cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc); + mark_for_reconnect_if_needed(tcon->ses->server, &old_tl, &new_tl); } out: free_xid(xid); - dfs_cache_free_tgts(&tl); - free_dfs_info_array(refs, numrefs); + dfs_cache_free_tgts(&old_tl); + dfs_cache_free_tgts(&new_tl); return rc; } diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index f7cff0be9327..be3b5a44cf82 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -35,9 +35,6 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl struct dfs_cache_tgt_list *tgt_list); int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tgt_list); -int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *cp, int remap, const char *path, - const struct dfs_cache_tgt_iterator *it); void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it); int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it, struct dfs_info3_param *ref); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ad4208bf1e32..2b6076324ffc 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -529,7 +529,7 @@ out_free_xid: return rc; } -int cifs_create(struct user_namespace *mnt_userns, struct inode *inode, +int cifs_create(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, umode_t mode, bool excl) { int rc; @@ -579,7 +579,7 @@ out_free_xid: return rc; } -int cifs_mknod(struct user_namespace *mnt_userns, struct inode *inode, +int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, umode_t mode, dev_t device_number) { int rc = -EPERM; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 22dfc1f8b4f1..2870e3b6ffe8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -9,6 +9,7 @@ * */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/backing-dev.h> #include <linux/stat.h> #include <linux/fcntl.h> @@ -3889,7 +3890,7 @@ uncached_fill_pages(struct TCP_Server_Info *server, rdata->got_bytes += result; } - return rdata->got_bytes > 0 && result != -ECONNABORTED ? + return result != -ECONNABORTED && rdata->got_bytes > 0 ? rdata->got_bytes : result; } @@ -4665,7 +4666,7 @@ readpages_fill_pages(struct TCP_Server_Info *server, rdata->got_bytes += result; } - return rdata->got_bytes > 0 && result != -ECONNABORTED ? + return result != -ECONNABORTED && rdata->got_bytes > 0 ? rdata->got_bytes : result; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f145a59af89b..11cdc7cfe0ba 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1910,7 +1910,7 @@ posix_mkdir_get_info: } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ -int cifs_mkdir(struct user_namespace *mnt_userns, struct inode *inode, +int cifs_mkdir(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, umode_t mode) { int rc = 0; @@ -2138,7 +2138,7 @@ do_rename_exit: } int -cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, +cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, struct dentry *source_dentry, struct inode *target_dir, struct dentry *target_dentry, unsigned int flags) { @@ -2496,7 +2496,7 @@ int cifs_revalidate_dentry(struct dentry *dentry) return cifs_revalidate_mapping(inode); } -int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; @@ -2537,7 +2537,7 @@ int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path, return rc; } - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->blksize = cifs_sb->ctx->bsize; stat->ino = CIFS_I(inode)->uniqueid; @@ -2752,7 +2752,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; - rc = setattr_prepare(&init_user_ns, direntry, attrs); + rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs); if (rc < 0) goto out; @@ -2859,7 +2859,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); } - setattr_copy(&init_user_ns, inode, attrs); + setattr_copy(&nop_mnt_idmap, inode, attrs); mark_inode_dirty(inode); /* force revalidate when any of these times are set since some @@ -2903,7 +2903,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; - rc = setattr_prepare(&init_user_ns, direntry, attrs); + rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs); if (rc < 0) goto cifs_setattr_exit; @@ -3058,7 +3058,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); } - setattr_copy(&init_user_ns, inode, attrs); + setattr_copy(&nop_mnt_idmap, inode, attrs); mark_inode_dirty(inode); cifs_setattr_exit: @@ -3068,7 +3068,7 @@ cifs_setattr_exit: } int -cifs_setattr(struct user_namespace *mnt_userns, struct dentry *direntry, +cifs_setattr(struct mnt_idmap *idmap, struct dentry *direntry, struct iattr *attrs) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bd374feeccaa..4510dea77be3 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = 0644; rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); @@ -568,7 +569,7 @@ cifs_hl_exit: } int -cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode, +cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, const char *symname) { int rc = -EOPNOTSUPP; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4d3c586785a5..2a19c7987c5b 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1277,7 +1277,9 @@ int match_target_ip(struct TCP_Server_Info *server, if (rc < 0) return rc; + spin_lock(&server->srv_lock); *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); + spin_unlock(&server->srv_lock); cifs_dbg(FYI, "%s: ip addresses match: %u\n", __func__, *result); return 0; } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 9e7d9f0baa18..c47b254f0d1e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -292,9 +292,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) continue; } kref_get(&iface->refcount); + break; } - if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { rc = 1; iface = NULL; cifs_dbg(FYI, "unable to find a suitable iface\n"); @@ -814,6 +815,7 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, return -EINVAL; } if (tilen) { + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1427,6 +1429,7 @@ sess_auth_kerberos(struct sess_data *sess_data) goto out_put_spnego_key; } + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 50480751e521..4cb364454e13 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); - if (!rc) - move_cifs_info_to_smb2(&data->fi, &fi); *adjustTZ = true; } - if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) { + if (!rc) { int tmprc; int oplock = 0; struct cifs_fid fid; struct cifs_open_parms oparms; + move_cifs_info_to_smb2(&data->fi, &fi); + + if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) + return 0; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; @@ -716,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) { - FILE_ALL_INFO *fi = buf; + struct cifs_open_info_data *data = buf; + FILE_ALL_INFO fi = {}; + int rc; if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) - return SMBLegacyOpen(xid, oparms->tcon, oparms->path, - oparms->disposition, - oparms->desired_access, - oparms->create_options, - &oparms->fid->netfid, oplock, fi, - oparms->cifs_sb->local_nls, - cifs_remap(oparms->cifs_sb)); - return CIFS_open(xid, oparms, oplock, fi); + rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, + oparms->desired_access, + oparms->create_options, + &oparms->fid->netfid, oplock, &fi, + oparms->cifs_sb->local_nls, + cifs_remap(oparms->cifs_sb)); + else + rc = CIFS_open(xid, oparms, oplock, &fi); + + if (!rc && data) + move_cifs_info_to_smb2(&data->fi, &fi); + + return rc; } static void @@ -1050,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - FILE_ALL_INFO *buf = NULL; + struct cifs_open_info_data buf = {}; struct cifs_io_parms io_parms; __u32 oplock = 0; struct cifs_fid fid; @@ -1082,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode, cifs_sb->local_nls, cifs_remap(cifs_sb)); if (rc) - goto out; + return rc; rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (rc == 0) d_instantiate(dentry, newinode); - goto out; + return rc; } /* @@ -1097,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode, * support block and char device (no socket & fifo) */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - goto out; + return rc; if (!S_ISCHR(mode) && !S_ISBLK(mode)) - goto out; + return rc; cifs_dbg(FYI, "sfu compat create special file\n"); - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (buf == NULL) { - rc = -ENOMEM; - goto out; - } - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; @@ -1124,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode, oplock = REQ_OPLOCK; else oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); if (rc) - goto out; + return rc; /* * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)buf; + pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = buf; + iov[1].iov_base = &buf.fi; iov[1].iov_len = sizeof(struct win_dev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); @@ -1157,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_drop(dentry); /* FIXME: add code here to set EAs */ -out: - kfree(buf); + + cifs_free_open_info(&buf); return rc; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index ba6cc50af390..9f1dd04b555a 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -7,6 +7,7 @@ * */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/stat.h> #include <linux/slab.h> #include <linux/pagemap.h> diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index dc160de7a6de..e6bcd2baf446 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,7 +530,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); - ses->iface_count = 0; /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -540,6 +539,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, iface_head) { iface->is_active = 0; kref_put(&iface->refcount, release_iface); + ses->iface_count--; } spin_unlock(&ses->iface_lock); @@ -618,6 +618,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, /* just get a ref so that it doesn't get picked/freed */ iface->is_active = 1; kref_get(&iface->refcount); + ses->iface_count++; spin_unlock(&ses->iface_lock); goto next_iface; } else if (ret < 0) { @@ -4488,17 +4489,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, /* copy pages form the old */ for (j = 0; j < npages; j++) { - char *dst, *src; unsigned int offset, len; rqst_page_get_length(new, j, &len, &offset); - dst = kmap_local_page(new->rq_pages[j]) + offset; - src = kmap_local_page(old->rq_pages[j]) + offset; - - memcpy(dst, src, len); - kunmap(new->rq_pages[j]); - kunmap(old->rq_pages[j]); + memcpy_page(new->rq_pages[j], offset, + old->rq_pages[j], offset, len); } } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a5695748a89b..2c9ffa921e6f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -541,9 +541,10 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req, struct TCP_Server_Info *server, unsigned int *total_len) { - char *pneg_ctxt; - char *hostname = NULL; unsigned int ctxt_len, neg_context_count; + struct TCP_Server_Info *pserver; + char *pneg_ctxt; + char *hostname; if (*total_len > 200) { /* In case length corrupted don't want to overrun smb buffer */ @@ -574,8 +575,9 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, * secondary channels don't have the hostname field populated * use the hostname field in the primary channel instead */ - hostname = CIFS_SERVER_IS_CHAN(server) ? - server->primary_server->hostname : server->hostname; + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + cifs_server_lock(pserver); + hostname = pserver->hostname; if (hostname && (hostname[0] != 0)) { ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, hostname); @@ -584,6 +586,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, neg_context_count = 3; } else neg_context_count = 2; + cifs_server_unlock(pserver); build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); @@ -1450,6 +1453,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) /* keep session key if binding */ if (!is_binding) { + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1479,8 +1483,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) out_put_spnego_key: key_invalidate(spnego_key); key_put(spnego_key); - if (rc) + if (rc) { kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } out: sess_data->result = rc; sess_data->func = NULL; @@ -4156,12 +4163,15 @@ smb2_readv_callback(struct mid_q_entry *mid) (struct smb2_hdr *)rdata->iov[0].iov_base; struct cifs_credits credits = { .value = 0, .instance = 0 }; struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], - .rq_nvec = 1, - .rq_pages = rdata->pages, - .rq_offset = rdata->page_offset, - .rq_npages = rdata->nr_pages, - .rq_pagesz = rdata->pagesz, - .rq_tailsz = rdata->tailsz }; + .rq_nvec = 1, }; + + if (rdata->got_bytes) { + rqst.rq_pages = rdata->pages; + rqst.rq_offset = rdata->page_offset; + rqst.rq_npages = rdata->nr_pages; + rqst.rq_pagesz = rdata->pagesz; + rqst.rq_tailsz = rdata->tailsz; + } WARN_ONCE(rdata->server != mid->server, "rdata server %p != mid server %p", diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 90789aaa6567..8c816b25ce7c 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1405,6 +1405,7 @@ void smbd_destroy(struct TCP_Server_Info *server) destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); + server->smbd_conn = NULL; } /* diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 5f2fb2fd2e37..50e762fa1a14 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -89,7 +89,7 @@ static int cifs_creation_time_set(unsigned int xid, struct cifs_tcon *pTcon, } static int cifs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index 9be281bbcc06..dd6277d87afb 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -46,12 +46,12 @@ extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct user_namespace *mnt_userns, struct inode *inode, +int coda_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); int coda_revalidate_inode(struct inode *); -int coda_getattr(struct user_namespace *, const struct path *, struct kstat *, +int coda_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); -int coda_setattr(struct user_namespace *, struct dentry *, struct iattr *); +int coda_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); /* this file: helpers */ char *coda_f2s(struct CodaFid *f); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 328d7a684b63..8450b1bd354b 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -73,7 +73,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig } -int coda_permission(struct user_namespace *mnt_userns, struct inode *inode, +int coda_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int error; @@ -133,7 +133,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir) } /* creation routines: create, mknod, mkdir, link, symlink */ -static int coda_create(struct user_namespace *mnt_userns, struct inode *dir, +static int coda_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *de, umode_t mode, bool excl) { int error; @@ -166,7 +166,7 @@ err_out: return error; } -static int coda_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int coda_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *de, umode_t mode) { struct inode *inode; @@ -228,7 +228,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, } -static int coda_symlink(struct user_namespace *mnt_userns, +static int coda_symlink(struct mnt_idmap *idmap, struct inode *dir_inode, struct dentry *de, const char *symname) { @@ -295,7 +295,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) } /* rename */ -static int coda_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int coda_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 2185328b65c7..d661e6cf17ac 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -251,16 +251,16 @@ static void coda_evict_inode(struct inode *inode) coda_cache_clear_inode(inode); } -int coda_getattr(struct user_namespace *mnt_userns, const struct path *path, +int coda_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { int err = coda_revalidate_inode(d_inode(path->dentry)); if (!err) - generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); return err; } -int coda_setattr(struct user_namespace *mnt_userns, struct dentry *de, +int coda_setattr(struct mnt_idmap *idmap, struct dentry *de, struct iattr *iattr) { struct inode *inode = d_inode(de); diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index cb9fd59a688c..36e35c15561a 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -24,7 +24,7 @@ #include "coda_linux.h" /* pioctl ops */ -static int coda_ioctl_permission(struct user_namespace *mnt_userns, +static int coda_ioctl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); static long coda_pioctl(struct file *filp, unsigned int cmd, unsigned long user_data); @@ -41,7 +41,7 @@ const struct file_operations coda_ioctl_operations = { }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct user_namespace *mnt_userns, +static int coda_ioctl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return (mask & MAY_EXEC) ? -EACCES : 0; diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index c0395363eab9..e710a1782382 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -77,7 +77,7 @@ extern void configfs_hash_and_remove(struct dentry * dir, const char * name); extern const unsigned char * configfs_get_name(struct configfs_dirent *sd); extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); -extern int configfs_setattr(struct user_namespace *mnt_userns, +extern int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); extern struct dentry *configfs_pin_fs(void); @@ -91,7 +91,7 @@ extern const struct inode_operations configfs_root_inode_operations; extern const struct inode_operations configfs_symlink_inode_operations; extern const struct dentry_operations configfs_dentry_ops; -extern int configfs_symlink(struct user_namespace *mnt_userns, +extern int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname); extern int configfs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index ec6519e1ca3b..4afcbbe63e68 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1251,7 +1251,7 @@ out_root_unlock: } EXPORT_SYMBOL(configfs_depend_item_unlocked); -static int configfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int configfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int ret = 0; diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index b601610e9907..1c15edbe70ff 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -32,7 +32,7 @@ static const struct inode_operations configfs_inode_operations ={ .setattr = configfs_setattr, }; -int configfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode * inode = d_inode(dentry); @@ -60,7 +60,7 @@ int configfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } /* attributes were changed atleast once in past */ - error = simple_setattr(mnt_userns, dentry, iattr); + error = simple_setattr(idmap, dentry, iattr); if (error) return error; diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 0623c3edcfb9..69133ec1fac2 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -137,7 +137,7 @@ static int get_target(const char *symname, struct path *path, } -int configfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +int configfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int ret; @@ -196,7 +196,7 @@ int configfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (dentry->d_inode || d_unhashed(dentry)) ret = -EEXIST; else - ret = inode_permission(&init_user_ns, dir, + ret = inode_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC); if (!ret) ret = type->ct_item_ops->allow_link(parent_item, target_item); diff --git a/fs/coredump.c b/fs/coredump.c index de78bde2991b..68619329ec65 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -644,7 +644,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) goto close_fail; } } else { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct inode *inode; int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | O_LARGEFILE | O_EXCL; @@ -722,8 +722,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) * a process dumps core while its cwd is e.g. on a vfat * filesystem. */ - mnt_userns = file_mnt_user_ns(cprm.file); - if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), + idmap = file_mnt_idmap(cprm.file); + if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) { pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n", cn.corename); @@ -736,7 +736,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) } if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; - if (do_truncate(mnt_userns, cprm.file->f_path.dentry, + if (do_truncate(idmap, cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; } @@ -838,6 +838,30 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr) } } +int dump_emit(struct coredump_params *cprm, const void *addr, int nr) +{ + if (cprm->to_skip) { + if (!__dump_skip(cprm, cprm->to_skip)) + return 0; + cprm->to_skip = 0; + } + return __dump_emit(cprm, addr, nr); +} +EXPORT_SYMBOL(dump_emit); + +void dump_skip_to(struct coredump_params *cprm, unsigned long pos) +{ + cprm->to_skip = pos - cprm->pos; +} +EXPORT_SYMBOL(dump_skip_to); + +void dump_skip(struct coredump_params *cprm, size_t nr) +{ + cprm->to_skip += nr; +} +EXPORT_SYMBOL(dump_skip); + +#ifdef CONFIG_ELF_CORE static int dump_emit_page(struct coredump_params *cprm, struct page *page) { struct bio_vec bvec = { @@ -871,30 +895,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) return 1; } -int dump_emit(struct coredump_params *cprm, const void *addr, int nr) -{ - if (cprm->to_skip) { - if (!__dump_skip(cprm, cprm->to_skip)) - return 0; - cprm->to_skip = 0; - } - return __dump_emit(cprm, addr, nr); -} -EXPORT_SYMBOL(dump_emit); - -void dump_skip_to(struct coredump_params *cprm, unsigned long pos) -{ - cprm->to_skip = pos - cprm->pos; -} -EXPORT_SYMBOL(dump_skip_to); - -void dump_skip(struct coredump_params *cprm, size_t nr) -{ - cprm->to_skip += nr; -} -EXPORT_SYMBOL(dump_skip); - -#ifdef CONFIG_ELF_CORE int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len) { diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 893661b52376..fa09c925bda8 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -506,7 +506,7 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) return -EFAULT; policy.version = version; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -1271,8 +1271,9 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) if (ret < 0) goto out_unlock; - ret = copy_mc_to_kernel(daddr, saddr, length); - if (ret) + if (copy_mc_to_kernel(daddr, saddr, length) == 0) + ret = length; + else ret = -EIO; out_unlock: diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2e8e112b1993..bf397f6a6a33 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -42,7 +42,7 @@ static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS; * so that we can use the file mode as part of a heuristic to determine whether * to lock down individual files. */ -static int debugfs_setattr(struct user_namespace *mnt_userns, +static int debugfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *ia) { int ret; @@ -52,7 +52,7 @@ static int debugfs_setattr(struct user_namespace *mnt_userns, if (ret) return ret; } - return simple_setattr(&init_user_ns, dentry, ia); + return simple_setattr(&nop_mnt_idmap, dentry, ia); } static const struct inode_operations debugfs_file_inode_operations = { @@ -837,7 +837,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, take_dentry_name_snapshot(&old_name, old_dentry); - error = simple_rename(&init_user_ns, d_inode(old_dir), old_dentry, + error = simple_rename(&nop_mnt_idmap, d_inode(old_dir), old_dentry, d_inode(new_dir), dentry, 0); if (error) { release_dentry_name_snapshot(&old_name); diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 737f185aad8d..ed4357e62f35 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -4,6 +4,7 @@ */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/miscdevice.h> #include <linux/poll.h> #include <linux/dlm.h> diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index e3f5d7f3c8a0..bd3f3c755b24 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1105,7 +1105,7 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, } inode_lock(lower_inode); - rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode, + rc = __vfs_setxattr(&nop_mnt_idmap, lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME, page_virt, size, 0); if (!rc && ecryptfs_inode) fsstack_copy_attr_all(ecryptfs_inode, lower_inode); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index f3cd00fac9c3..144ace9e0dd9 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -139,7 +139,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, if (d_unhashed(lower_dentry)) rc = -EINVAL; else - rc = vfs_unlink(&init_user_ns, lower_dir, lower_dentry, + rc = vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry, NULL); } if (rc) { @@ -180,7 +180,7 @@ ecryptfs_do_create(struct inode *directory_inode, rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir); if (!rc) - rc = vfs_create(&init_user_ns, lower_dir, + rc = vfs_create(&nop_mnt_idmap, lower_dir, lower_dentry, mode, true); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " @@ -191,7 +191,7 @@ ecryptfs_do_create(struct inode *directory_inode, inode = __ecryptfs_get_inode(d_inode(lower_dentry), directory_inode->i_sb); if (IS_ERR(inode)) { - vfs_unlink(&init_user_ns, lower_dir, lower_dentry, NULL); + vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry, NULL); goto out_lock; } fsstack_copy_attr_times(directory_inode, lower_dir); @@ -253,7 +253,7 @@ out: * Returns zero on success; non-zero on error condition */ static int -ecryptfs_create(struct user_namespace *mnt_userns, +ecryptfs_create(struct mnt_idmap *idmap, struct inode *directory_inode, struct dentry *ecryptfs_dentry, umode_t mode, bool excl) { @@ -434,7 +434,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); rc = lock_parent(new_dentry, &lower_new_dentry, &lower_dir); if (!rc) - rc = vfs_link(lower_old_dentry, &init_user_ns, lower_dir, + rc = vfs_link(lower_old_dentry, &nop_mnt_idmap, lower_dir, lower_new_dentry, NULL); if (rc || d_really_is_negative(lower_new_dentry)) goto out_lock; @@ -456,7 +456,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) return ecryptfs_do_unlink(dir, dentry, d_inode(dentry)); } -static int ecryptfs_symlink(struct user_namespace *mnt_userns, +static int ecryptfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { @@ -478,7 +478,7 @@ static int ecryptfs_symlink(struct user_namespace *mnt_userns, strlen(symname)); if (rc) goto out_lock; - rc = vfs_symlink(&init_user_ns, lower_dir, lower_dentry, + rc = vfs_symlink(&nop_mnt_idmap, lower_dir, lower_dentry, encoded_symname); kfree(encoded_symname); if (rc || d_really_is_negative(lower_dentry)) @@ -495,7 +495,7 @@ out_lock: return rc; } -static int ecryptfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int rc; @@ -504,7 +504,7 @@ static int ecryptfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, rc = lock_parent(dentry, &lower_dentry, &lower_dir); if (!rc) - rc = vfs_mkdir(&init_user_ns, lower_dir, + rc = vfs_mkdir(&nop_mnt_idmap, lower_dir, lower_dentry, mode); if (rc || d_really_is_negative(lower_dentry)) goto out; @@ -533,7 +533,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) if (d_unhashed(lower_dentry)) rc = -EINVAL; else - rc = vfs_rmdir(&init_user_ns, lower_dir, lower_dentry); + rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry); } if (!rc) { clear_nlink(d_inode(dentry)); @@ -548,7 +548,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) } static int -ecryptfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { int rc; @@ -557,7 +557,7 @@ ecryptfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, rc = lock_parent(dentry, &lower_dentry, &lower_dir); if (!rc) - rc = vfs_mknod(&init_user_ns, lower_dir, + rc = vfs_mknod(&nop_mnt_idmap, lower_dir, lower_dentry, mode, dev); if (rc || d_really_is_negative(lower_dentry)) goto out; @@ -574,7 +574,7 @@ out: } static int -ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -616,10 +616,10 @@ ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, goto out_lock; } - rd.old_mnt_userns = &init_user_ns; + rd.old_mnt_idmap = &nop_mnt_idmap; rd.old_dir = d_inode(lower_old_dir_dentry); rd.old_dentry = lower_old_dentry; - rd.new_mnt_userns = &init_user_ns; + rd.new_mnt_idmap = &nop_mnt_idmap; rd.new_dir = d_inode(lower_new_dir_dentry); rd.new_dentry = lower_new_dentry; rc = vfs_rename(&rd); @@ -856,7 +856,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); inode_lock(d_inode(lower_dentry)); - rc = notify_change(&init_user_ns, lower_dentry, + rc = notify_change(&nop_mnt_idmap, lower_dentry, &lower_ia, NULL); inode_unlock(d_inode(lower_dentry)); } @@ -864,16 +864,16 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) } static int -ecryptfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +ecryptfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { - return inode_permission(&init_user_ns, + return inode_permission(&nop_mnt_idmap, ecryptfs_inode_to_lower(inode), mask); } /** * ecryptfs_setattr - * @mnt_userns: user namespace of the target mount + * @idmap: idmap of the target mount * @dentry: dentry handle to the inode to modify * @ia: Structure with flags of what to change and values * @@ -884,7 +884,7 @@ ecryptfs_permission(struct user_namespace *mnt_userns, struct inode *inode, * All other metadata changes will be passed right to the lower filesystem, * and we will just update our inode to look like the lower. */ -static int ecryptfs_setattr(struct user_namespace *mnt_userns, +static int ecryptfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *ia) { int rc = 0; @@ -939,7 +939,7 @@ static int ecryptfs_setattr(struct user_namespace *mnt_userns, } mutex_unlock(&crypt_stat->cs_mutex); - rc = setattr_prepare(&init_user_ns, dentry, ia); + rc = setattr_prepare(&nop_mnt_idmap, dentry, ia); if (rc) goto out; if (ia->ia_valid & ATTR_SIZE) { @@ -965,14 +965,14 @@ static int ecryptfs_setattr(struct user_namespace *mnt_userns, lower_ia.ia_valid &= ~ATTR_MODE; inode_lock(d_inode(lower_dentry)); - rc = notify_change(&init_user_ns, lower_dentry, &lower_ia, NULL); + rc = notify_change(&nop_mnt_idmap, lower_dentry, &lower_ia, NULL); inode_unlock(d_inode(lower_dentry)); out: fsstack_copy_attr_all(inode, lower_inode); return rc; } -static int ecryptfs_getattr_link(struct user_namespace *mnt_userns, +static int ecryptfs_getattr_link(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -982,7 +982,7 @@ static int ecryptfs_getattr_link(struct user_namespace *mnt_userns, mount_crypt_stat = &ecryptfs_superblock_to_private( dentry->d_sb)->mount_crypt_stat; - generic_fillattr(&init_user_ns, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { char *target; size_t targetsiz; @@ -998,7 +998,7 @@ static int ecryptfs_getattr_link(struct user_namespace *mnt_userns, return rc; } -static int ecryptfs_getattr(struct user_namespace *mnt_userns, +static int ecryptfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -1011,7 +1011,7 @@ static int ecryptfs_getattr(struct user_namespace *mnt_userns, if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); - generic_fillattr(&init_user_ns, d_inode(dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat); stat->blocks = lower_stat.blocks; } return rc; @@ -1033,7 +1033,7 @@ ecryptfs_setxattr(struct dentry *dentry, struct inode *inode, goto out; } inode_lock(lower_inode); - rc = __vfs_setxattr_locked(&init_user_ns, lower_dentry, name, value, size, flags, NULL); + rc = __vfs_setxattr_locked(&nop_mnt_idmap, lower_dentry, name, value, size, flags, NULL); inode_unlock(lower_inode); if (!rc && inode) fsstack_copy_attr_all(inode, lower_inode); @@ -1099,7 +1099,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, struct inode *inode, goto out; } inode_lock(lower_inode); - rc = __vfs_removexattr(&init_user_ns, lower_dentry, name); + rc = __vfs_removexattr(&nop_mnt_idmap, lower_dentry, name); inode_unlock(lower_inode); out: return rc; @@ -1110,26 +1110,26 @@ static int ecryptfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return vfs_fileattr_get(ecryptfs_dentry_to_lower(dentry), fa); } -static int ecryptfs_fileattr_set(struct user_namespace *mnt_userns, +static int ecryptfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); int rc; - rc = vfs_fileattr_set(&init_user_ns, lower_dentry, fa); + rc = vfs_fileattr_set(&nop_mnt_idmap, lower_dentry, fa); fsstack_copy_attr_all(d_inode(dentry), d_inode(lower_dentry)); return rc; } -static struct posix_acl *ecryptfs_get_acl(struct user_namespace *mnt_userns, +static struct posix_acl *ecryptfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { - return vfs_get_acl(mnt_userns, ecryptfs_dentry_to_lower(dentry), + return vfs_get_acl(idmap, ecryptfs_dentry_to_lower(dentry), posix_acl_xattr_name(type)); } -static int ecryptfs_set_acl(struct user_namespace *mnt_userns, +static int ecryptfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { @@ -1137,7 +1137,7 @@ static int ecryptfs_set_acl(struct user_namespace *mnt_userns, struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); struct inode *lower_inode = d_inode(lower_dentry); - rc = vfs_set_acl(&init_user_ns, lower_dentry, + rc = vfs_set_acl(&nop_mnt_idmap, lower_dentry, posix_acl_xattr_name(type), acl); if (!rc) fsstack_copy_attr_all(d_inode(dentry), lower_inode); @@ -1190,7 +1190,7 @@ static int ecryptfs_xattr_get(const struct xattr_handler *handler, } static int ecryptfs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 19af229eb7ca..373c3e5747e6 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -428,7 +428,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) if (size < 0) size = 8; put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt); - rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode, + rc = __vfs_setxattr(&nop_mnt_idmap, lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME, xattr_virt, size, 0); inode_unlock(lower_inode); if (rc) diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 617f3ad2485e..b973a2c03dde 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -70,7 +70,7 @@ bool efivarfs_valid_name(const char *str, int len) return uuid_is_valid(s); } -static int efivarfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int efivarfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode = NULL; @@ -163,7 +163,7 @@ efivarfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } static int -efivarfs_fileattr_set(struct user_namespace *mnt_userns, +efivarfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { unsigned int i_flags = 0; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index d3b8736fa124..a194e8ee5861 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -353,7 +353,7 @@ struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid) return inode; } -int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -366,7 +366,7 @@ int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_IMMUTABLE); - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); return 0; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index bb8501c0ff5b..e05ae61069e8 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -494,7 +494,7 @@ extern const struct inode_operations erofs_symlink_iops; extern const struct inode_operations erofs_fast_symlink_iops; struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); -int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 481788c24a68..626a615dafc2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -577,26 +577,25 @@ static int erofs_fc_parse_param(struct fs_context *fc, } ++ctx->devs->extra_devices; break; - case Opt_fsid: #ifdef CONFIG_EROFS_FS_ONDEMAND + case Opt_fsid: kfree(ctx->fsid); ctx->fsid = kstrdup(param->string, GFP_KERNEL); if (!ctx->fsid) return -ENOMEM; -#else - errorfc(fc, "fsid option not supported"); -#endif break; case Opt_domain_id: -#ifdef CONFIG_EROFS_FS_ONDEMAND kfree(ctx->domain_id); ctx->domain_id = kstrdup(param->string, GFP_KERNEL); if (!ctx->domain_id) return -ENOMEM; + break; #else - errorfc(fc, "domain_id option not supported"); -#endif + case Opt_fsid: + case Opt_domain_id: + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); break; +#endif default: return -ENOPARAM; } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index ccf7c55d477f..5200bb86e264 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1032,12 +1032,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (!be->decompressed_pages) be->decompressed_pages = - kvcalloc(be->nr_pages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kcalloc(be->nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); if (!be->compressed_pages) be->compressed_pages = - kvcalloc(pclusterpages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kcalloc(pclusterpages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); z_erofs_parse_out_bvecs(be); err2 = z_erofs_parse_in_bvecs(be, &overlapped); @@ -1085,7 +1085,7 @@ out: } if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) - kvfree(be->compressed_pages); + kfree(be->compressed_pages); z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { @@ -1104,7 +1104,7 @@ out: } if (be->decompressed_pages != be->onstack_pages) - kvfree(be->decompressed_pages); + kfree(be->decompressed_pages); pcl->length = 0; pcl->partial = true; diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0150570c33aa..98fb90b9af71 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -793,12 +793,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; /* - * No strict rule how to describe extents for post EOF, yet - * we need do like below. Otherwise, iomap itself will get + * No strict rule on how to describe extents for post EOF, yet + * we need to do like below. Otherwise, iomap itself will get * into an endless loop on post EOF. + * + * Calculate the effective offset by subtracting extent start + * (map.m_la) from the requested offset, and add it to length. + * (NB: offset >= map.m_la always) */ if (iomap->offset >= inode->i_size) - iomap->length = length + map.m_la - offset; + iomap->length = length + offset - map.m_la; } iomap->flags = 0; return 0; diff --git a/fs/exec.c b/fs/exec.c index ab913243a367..3d2b80d8d58e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1414,15 +1414,15 @@ EXPORT_SYMBOL(begin_new_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); - struct user_namespace *mnt_userns = file_mnt_user_ns(file); - if (inode_permission(mnt_userns, inode, MAY_READ) < 0) { + struct mnt_idmap *idmap = file_mnt_idmap(file); + if (inode_permission(idmap, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; /* Ensure mm->user_ns contains the executable */ user_ns = old = bprm->mm->user_ns; while ((user_ns != &init_user_ns) && - !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode)) + !privileged_wrt_inode_uidgid(user_ns, idmap, inode)) user_ns = user_ns->parent; if (old != user_ns) { @@ -1596,7 +1596,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) { /* Handle suid and sgid on files */ - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct inode *inode = file_inode(file); unsigned int mode; vfsuid_t vfsuid; @@ -1612,15 +1612,15 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) if (!(mode & (S_ISUID|S_ISGID))) return; - mnt_userns = file_mnt_user_ns(file); + idmap = file_mnt_idmap(file); /* Be careful if suid/sgid is set */ inode_lock(inode); /* reload atomically mode/uid/gid now that lock held */ mode = inode->i_mode; - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); inode_unlock(inode); /* We ignore suid/sgid if there are no mappings for them in the ns */ diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index bc6d21d7c5ad..1bf16abe3c84 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -450,9 +450,9 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); extern const struct file_operations exfat_file_operations; int __exfat_truncate(struct inode *inode); void exfat_truncate(struct inode *inode); -int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path, +int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, unsigned int request_mask, unsigned int query_flags); int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index f5b29072775d..1fdb0a64b91d 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -226,14 +226,14 @@ write_size: mutex_unlock(&sbi->s_lock); } -int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path, +int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, unsigned int request_mask, unsigned int query_flags) { struct inode *inode = d_backing_inode(path->dentry); struct exfat_inode_info *ei = EXFAT_I(inode); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); exfat_truncate_atime(&stat->atime); stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = ei->i_crtime.tv_sec; @@ -242,7 +242,7 @@ int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path, return 0; } -int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb); @@ -266,7 +266,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ATTR_TIMES_SET); } - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); attr->ia_valid = ia_valid; if (error) goto out; @@ -293,7 +293,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (attr->ia_valid & ATTR_SIZE) inode->i_mtime = inode->i_ctime = current_time(inode); - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); exfat_truncate_atime(&inode->i_atime); if (attr->ia_valid & ATTR_SIZE) { diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 5f995eba5dbb..02aab4c3a5f7 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -551,7 +551,7 @@ out: return ret; } -static int exfat_create(struct user_namespace *mnt_userns, struct inode *dir, +static int exfat_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; @@ -834,7 +834,7 @@ unlock: return err; } -static int exfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct super_block *sb = dir->i_sb; @@ -1285,7 +1285,7 @@ out: return ret; } -static int exfat_rename(struct user_namespace *mnt_userns, +static int exfat_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 3204bd33e4e8..ab88d33d106c 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -145,7 +145,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, if (err) goto out_err; dprintk("%s: found name: %s\n", __func__, nbuf); - tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); + tmp = lookup_one_unlocked(mnt_idmap(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); @@ -524,7 +524,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, } inode_lock(target_dir->d_inode); - nresult = lookup_one(mnt_user_ns(mnt), nbuf, + nresult = lookup_one(mnt_idmap(mnt), nbuf, target_dir, strlen(nbuf)); if (!IS_ERR(nresult)) { if (unlikely(nresult->d_inode != result->d_inode)) { diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 440d5f1e9d47..82b17d7fc93f 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -219,7 +219,7 @@ __ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) * inode->i_mutex: down */ int -ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int error; @@ -228,7 +228,7 @@ ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, umode_t mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { - error = posix_acl_update_mode(&init_user_ns, inode, &mode, + error = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (error) return error; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 3841becb94ff..4a8443a2b8ec 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -56,7 +56,7 @@ static inline int ext2_acl_count(size_t size) /* acl.c */ extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu); -extern int ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 28de11a22e5f..5b52306e2e95 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -753,8 +753,8 @@ extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_evict_inode(struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern int ext2_setattr (struct user_namespace *, struct dentry *, struct iattr *); -extern int ext2_getattr (struct user_namespace *, const struct path *, +extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *); +extern int ext2_getattr (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern void ext2_set_inode_flags(struct inode *inode); extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -762,7 +762,7 @@ extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* ioctl.c */ extern int ext2_fileattr_get(struct dentry *dentry, struct fileattr *fa); -extern int ext2_fileattr_set(struct user_namespace *mnt_userns, +extern int ext2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); extern long ext2_ioctl(struct file *, unsigned int, unsigned long); extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 78b8686d9a4a..a4e1d7a9c544 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -545,7 +545,7 @@ got: inode->i_uid = current_fsuid(); inode->i_gid = dir->i_gid; } else - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 69aed9e2359e..26f135e7ffce 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1592,7 +1592,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } -int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ext2_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); @@ -1614,28 +1614,28 @@ int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); return 0; } -int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ext2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int error; - error = setattr_prepare(&init_user_ns, dentry, iattr); + error = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (error) return error; - if (is_quota_modification(mnt_userns, inode, iattr)) { + if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) { error = dquot_initialize(inode); if (error) return error; } - if (i_uid_needs_update(mnt_userns, iattr, inode) || - i_gid_needs_update(mnt_userns, iattr, inode)) { - error = dquot_transfer(mnt_userns, inode, iattr); + if (i_uid_needs_update(&nop_mnt_idmap, iattr, inode) || + i_gid_needs_update(&nop_mnt_idmap, iattr, inode)) { + error = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (error) return error; } @@ -1644,9 +1644,9 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) return error; } - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); if (iattr->ia_valid & ATTR_MODE) - error = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); + error = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); mark_inode_dirty(inode); return error; diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index e8340bf09b10..cc87d413eb43 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -27,7 +27,7 @@ int ext2_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int ext2_fileattr_set(struct user_namespace *mnt_userns, +int ext2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); @@ -66,7 +66,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case EXT2_IOC_SETVERSION: { __u32 generation; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EPERM; ret = mnt_want_write_file(filp); if (ret) @@ -99,7 +99,7 @@ setversion_out: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EACCES; if (get_user(rsv_window_size, (int __user *)arg)) diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index c056957221a2..81808e3d11c1 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -99,7 +99,7 @@ struct dentry *ext2_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct user_namespace * mnt_userns, +static int ext2_create (struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { @@ -119,7 +119,7 @@ static int ext2_create (struct user_namespace * mnt_userns, return ext2_add_nondir(dentry, inode); } -static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int ext2_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode = ext2_new_inode(dir, mode, NULL); @@ -133,7 +133,7 @@ static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, return finish_open_simple(file, 0); } -static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir, +static int ext2_mknod (struct mnt_idmap * idmap, struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode * inode; @@ -154,7 +154,7 @@ static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir, return err; } -static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir, +static int ext2_symlink (struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, const char * symname) { struct super_block * sb = dir->i_sb; @@ -225,7 +225,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, return err; } -static int ext2_mkdir(struct user_namespace * mnt_userns, +static int ext2_mkdir(struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode) { struct inode * inode; @@ -315,7 +315,7 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry) return err; } -static int ext2_rename (struct user_namespace * mnt_userns, +static int ext2_rename (struct mnt_idmap * idmap, struct inode * old_dir, struct dentry * old_dentry, struct inode * new_dir, struct dentry * new_dentry, unsigned int flags) diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index ebade1f52451..db47b8ab153e 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -19,7 +19,7 @@ ext2_xattr_security_get(const struct xattr_handler *handler, static int ext2_xattr_security_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 18a87d5dd1ab..995f931228ce 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -26,7 +26,7 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler, static int ext2_xattr_trusted_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 58092449f8ff..dd1507231081 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -30,7 +30,7 @@ ext2_xattr_user_get(const struct xattr_handler *handler, static int ext2_xattr_user_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index a9f89539aeee..27fcbddfb148 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -225,7 +225,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext4_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +ext4_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { handle_t *handle; @@ -249,7 +249,7 @@ retry: return PTR_ERR(handle); if ((type == ACL_TYPE_ACCESS) && acl) { - error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); + error = posix_acl_update_mode(idmap, inode, &mode, &acl); if (error) goto out_stop; if (mode != inode->i_mode) diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 09c4a8a3b716..0c5a79c3b5d4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -56,7 +56,7 @@ static inline int ext4_acl_count(size_t size) /* acl.c */ struct posix_acl *ext4_get_acl(struct inode *inode, int type, bool rcu); -int ext4_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ext4_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 140e1eb300d1..43e26e6f6e42 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2845,7 +2845,7 @@ extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len, /* ialloc.c */ extern int ext4_mark_inode_used(struct super_block *sb, int ino); -extern struct inode *__ext4_new_inode(struct user_namespace *, handle_t *, +extern struct inode *__ext4_new_inode(struct mnt_idmap *, handle_t *, struct inode *, umode_t, const struct qstr *qstr, __u32 goal, uid_t *owner, __u32 i_flags, @@ -2853,11 +2853,11 @@ extern struct inode *__ext4_new_inode(struct user_namespace *, handle_t *, int nblocks); #define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \ - __ext4_new_inode(&init_user_ns, (handle), (dir), (mode), (qstr), \ + __ext4_new_inode(&nop_mnt_idmap, (handle), (dir), (mode), (qstr), \ (goal), (owner), i_flags, 0, 0, 0) -#define ext4_new_inode_start_handle(mnt_userns, dir, mode, qstr, goal, owner, \ +#define ext4_new_inode_start_handle(idmap, dir, mode, qstr, goal, owner, \ type, nblocks) \ - __ext4_new_inode((mnt_userns), NULL, (dir), (mode), (qstr), (goal), (owner), \ + __ext4_new_inode((idmap), NULL, (dir), (mode), (qstr), (goal), (owner), \ 0, (type), __LINE__, (nblocks)) @@ -2976,14 +2976,14 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, __ext4_iget((sb), (ino), (flags), __func__, __LINE__) extern int ext4_write_inode(struct inode *, struct writeback_control *); -extern int ext4_setattr(struct user_namespace *, struct dentry *, +extern int ext4_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern u32 ext4_dio_alignment(struct inode *inode); -extern int ext4_getattr(struct user_namespace *, const struct path *, +extern int ext4_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); -extern int ext4_file_getattr(struct user_namespace *, const struct path *, +extern int ext4_file_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *, int); @@ -3024,7 +3024,7 @@ extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); -int ext4_fileattr_set(struct user_namespace *mnt_userns, +int ext4_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa); extern void ext4_reset_inode_seed(struct inode *inode); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 63f9bb6e8851..157663031f8c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -921,7 +921,7 @@ static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode, * For other inodes, search forward from the parent directory's block * group to find a free inode. */ -struct inode *__ext4_new_inode(struct user_namespace *mnt_userns, +struct inode *__ext4_new_inode(struct mnt_idmap *idmap, handle_t *handle, struct inode *dir, umode_t mode, const struct qstr *qstr, __u32 goal, uid_t *owner, __u32 i_flags, @@ -972,10 +972,10 @@ struct inode *__ext4_new_inode(struct user_namespace *mnt_userns, i_gid_write(inode, owner[1]); } else if (test_opt(sb, GRPID)) { inode->i_mode = mode; - inode_fsuid_set(inode, mnt_userns); + inode_fsuid_set(inode, idmap); inode->i_gid = dir->i_gid; } else - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); if (ext4_has_feature_project(sb) && ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9d9f414f99fe..be664dc9b991 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5434,7 +5434,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * * Called with inode->i_rwsem down. */ -int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -5454,7 +5454,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ATTR_GID | ATTR_TIMES_SET)))) return -EPERM; - error = setattr_prepare(mnt_userns, dentry, attr); + error = setattr_prepare(idmap, dentry, attr); if (error) return error; @@ -5466,14 +5466,14 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) return error; - if (is_quota_modification(mnt_userns, inode, attr)) { + if (is_quota_modification(idmap, inode, attr)) { error = dquot_initialize(inode); if (error) return error; } - if (i_uid_needs_update(mnt_userns, attr, inode) || - i_gid_needs_update(mnt_userns, attr, inode)) { + if (i_uid_needs_update(idmap, attr, inode) || + i_gid_needs_update(idmap, attr, inode)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -5490,7 +5490,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, * counts xattr inode references. */ down_read(&EXT4_I(inode)->xattr_sem); - error = dquot_transfer(mnt_userns, inode, attr); + error = dquot_transfer(idmap, inode, attr); up_read(&EXT4_I(inode)->xattr_sem); if (error) { @@ -5499,8 +5499,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } /* Update corresponding info in inode so that everything is in * one transaction */ - i_uid_update(mnt_userns, attr, inode); - i_gid_update(mnt_userns, attr, inode); + i_uid_update(idmap, attr, inode); + i_gid_update(idmap, attr, inode); error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); if (unlikely(error)) { @@ -5630,7 +5630,7 @@ out_mmap_sem: if (!error) { if (inc_ivers) inode_inc_iversion(inode); - setattr_copy(mnt_userns, inode, attr); + setattr_copy(idmap, inode, attr); mark_inode_dirty(inode); } @@ -5642,7 +5642,7 @@ out_mmap_sem: ext4_orphan_del(NULL, inode); if (!error && (ia_valid & ATTR_MODE)) - rc = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); + rc = posix_acl_chmod(idmap, dentry, inode->i_mode); err_out: if (error) @@ -5668,7 +5668,7 @@ u32 ext4_dio_alignment(struct inode *inode) return 1; /* use the iomap defaults */ } -int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); @@ -5725,18 +5725,18 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); return 0; } -int ext4_file_getattr(struct user_namespace *mnt_userns, +int ext4_file_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); u64 delalloc_blocks; - ext4_getattr(mnt_userns, path, stat, request_mask, query_flags); + ext4_getattr(idmap, path, stat, request_mask, query_flags); /* * If there is inline data in the inode, the inode will normally not diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8067ccda34e4..b0dc7212694e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -358,12 +358,12 @@ void ext4_reset_inode_seed(struct inode *inode) * important fields of the inodes. * * @sb: the super block of the filesystem - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: the inode to swap with EXT4_BOOT_LOADER_INO * */ static long swap_inode_boot_loader(struct super_block *sb, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *inode) { handle_t *handle; @@ -393,7 +393,7 @@ static long swap_inode_boot_loader(struct super_block *sb, } if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) || - !inode_owner_or_capable(mnt_userns, inode) || + !inode_owner_or_capable(idmap, inode) || !capable(CAP_SYS_ADMIN)) { err = -EPERM; goto journal_err_out; @@ -979,7 +979,7 @@ int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int ext4_fileattr_set(struct user_namespace *mnt_userns, +int ext4_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); @@ -1217,7 +1217,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; - struct user_namespace *mnt_userns = file_mnt_user_ns(filp); + struct mnt_idmap *idmap = file_mnt_idmap(filp); ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); @@ -1234,7 +1234,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) __u32 generation; int err; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (ext4_has_metadata_csum(inode->i_sb)) { @@ -1376,7 +1376,7 @@ mext_out: case EXT4_IOC_MIGRATE: { int err; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; err = mnt_want_write_file(filp); @@ -1398,7 +1398,7 @@ mext_out: case EXT4_IOC_ALLOC_DA_BLKS: { int err; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; err = mnt_want_write_file(filp); @@ -1417,7 +1417,7 @@ mext_out: err = mnt_want_write_file(filp); if (err) return err; - err = swap_inode_boot_loader(sb, mnt_userns, inode); + err = swap_inode_boot_loader(sb, idmap, inode); mnt_drop_write_file(filp); return err; } @@ -1542,7 +1542,7 @@ resizefs_out: case EXT4_IOC_CLEAR_ES_CACHE: { - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; ext4_clear_inode_es(inode); return 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index dd28453d6ea3..d10a508d95cd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2792,7 +2792,7 @@ static int ext4_add_nondir(handle_t *handle, * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir, +static int ext4_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { handle_t *handle; @@ -2806,7 +2806,7 @@ static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir, credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name, + inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); @@ -2827,7 +2827,7 @@ retry: return err; } -static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int ext4_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { handle_t *handle; @@ -2841,7 +2841,7 @@ static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir, credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name, + inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); @@ -2861,7 +2861,7 @@ retry: return err; } -static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int ext4_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { handle_t *handle; @@ -2873,7 +2873,7 @@ static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, return err; retry: - inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, + inode = ext4_new_inode_start_handle(idmap, dir, mode, NULL, 0, NULL, EXT4_HT_DIR, EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + @@ -2972,7 +2972,7 @@ out: return err; } -static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ext4_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { handle_t *handle; @@ -2989,7 +2989,7 @@ static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir, credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode, + inode = ext4_new_inode_start_handle(idmap, dir, S_IFDIR | mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); @@ -3339,7 +3339,7 @@ out: return err; } -static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { handle_t *handle; @@ -3370,7 +3370,7 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; retry: - inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO, + inode = ext4_new_inode_start_handle(idmap, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); @@ -3720,7 +3720,7 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) } } -static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns, +static struct inode *ext4_whiteout_for_rename(struct mnt_idmap *idmap, struct ext4_renament *ent, int credits, handle_t **h) { @@ -3735,7 +3735,7 @@ static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns, credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + EXT4_XATTR_TRANS_BLOCKS + 4); retry: - wh = ext4_new_inode_start_handle(mnt_userns, ent->dir, + wh = ext4_new_inode_start_handle(idmap, ent->dir, S_IFCHR | WHITEOUT_MODE, &ent->dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); @@ -3763,7 +3763,7 @@ retry: * while new_{dentry,inode) refers to the destination dentry/inode * This comes from rename(const char *oldpath, const char *newpath) */ -static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -3851,7 +3851,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, goto release_bh; } } else { - whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle); + whiteout = ext4_whiteout_for_rename(idmap, &old, credits, &handle); if (IS_ERR(whiteout)) { retval = PTR_ERR(whiteout); goto release_bh; @@ -4158,7 +4158,7 @@ end_rename: return retval; } -static int ext4_rename2(struct user_namespace *mnt_userns, +static int ext4_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -4181,7 +4181,7 @@ static int ext4_rename2(struct user_namespace *mnt_userns, new_dir, new_dentry); } - return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags); + return ext4_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); } /* diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 3d3ed3c38f56..75bf1f88843c 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -55,12 +55,12 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, return paddr; } -static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns, +static int ext4_encrypted_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - ext4_getattr(mnt_userns, path, stat, request_mask, query_flags); + ext4_getattr(idmap, path, stat, request_mask, query_flags); return fscrypt_symlink_getattr(path, stat); } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 7decaaf27e82..a2f04a3808db 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -81,6 +81,8 @@ ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *, struct mb_cache_entry **); static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, size_t value_count); +static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, + size_t value_count); static void ext4_xattr_rehash(struct ext4_xattr_header *); static const struct xattr_handler * const ext4_xattr_handler_map[] = { @@ -470,8 +472,22 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode, tmp_data = cpu_to_le32(hash); e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len, &tmp_data, 1); + /* All good? */ + if (e_hash == entry->e_hash) + return 0; + + /* + * Not good. Maybe the entry hash was calculated + * using the buggy signed char version? + */ + e_hash = ext4_xattr_hash_entry_signed(entry->e_name, entry->e_name_len, + &tmp_data, 1); + /* Still no match - bad */ if (e_hash != entry->e_hash) return -EFSCORRUPTED; + + /* Let people know about old hash */ + pr_warn_once("ext4: filesystem with signed xattr name hash"); } return 0; } @@ -3081,7 +3097,29 @@ static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, while (name_len--) { hash = (hash << NAME_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ - *name++; + (unsigned char)*name++; + } + while (value_count--) { + hash = (hash << VALUE_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ + le32_to_cpu(*value++); + } + return cpu_to_le32(hash); +} + +/* + * ext4_xattr_hash_entry_signed() + * + * Compute the hash of an extended attribute incorrectly. + */ +static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, size_t value_count) +{ + __u32 hash = 0; + + while (name_len--) { + hash = (hash << NAME_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ + (signed char)*name++; } while (value_count--) { hash = (hash << VALUE_HASH_SHIFT) ^ diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c index c78df5790377..8a5842e4cd95 100644 --- a/fs/ext4/xattr_hurd.c +++ b/fs/ext4/xattr_hurd.c @@ -32,7 +32,7 @@ ext4_xattr_hurd_get(const struct xattr_handler *handler, static int ext4_xattr_hurd_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 8213f66f7b2d..776cf11d24ca 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -23,7 +23,7 @@ ext4_xattr_security_get(const struct xattr_handler *handler, static int ext4_xattr_security_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 7c21ffb26d25..9811eb0ab276 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -30,7 +30,7 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler, static int ext4_xattr_trusted_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 2fe7ff0a479c..4b70bf4e7626 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -31,7 +31,7 @@ ext4_xattr_user_get(const struct xattr_handler *handler, static int ext4_xattr_user_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index c1c74aa658ae..ec2aeccb69a3 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -204,7 +204,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu) return __f2fs_get_acl(inode, type, NULL); } -static int f2fs_acl_update_mode(struct user_namespace *mnt_userns, +static int f2fs_acl_update_mode(struct mnt_idmap *idmap, struct inode *inode, umode_t *mode_p, struct posix_acl **acl) { @@ -219,14 +219,14 @@ static int f2fs_acl_update_mode(struct user_namespace *mnt_userns, return error; if (error == 0) *acl = NULL; - if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) && - !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) + if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) && + !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) mode &= ~S_ISGID; *mode_p = mode; return 0; } -static int __f2fs_set_acl(struct user_namespace *mnt_userns, +static int __f2fs_set_acl(struct mnt_idmap *idmap, struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct user_namespace *mnt_userns, case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl && !ipage) { - error = f2fs_acl_update_mode(mnt_userns, inode, + error = f2fs_acl_update_mode(idmap, inode, &mode, &acl); if (error) return error; @@ -276,7 +276,7 @@ static int __f2fs_set_acl(struct user_namespace *mnt_userns, return error; } -int f2fs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int f2fs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct inode *inode = d_inode(dentry); @@ -284,7 +284,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL); + return __f2fs_set_acl(idmap, inode, type, acl, NULL); } /* diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index ea2bbb3f264b..94ebfbfbdc6f 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -34,7 +34,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool); -extern int f2fs_set_acl(struct user_namespace *, struct dentry *, +extern int f2fs_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, struct page *); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6e43e19c7d1c..97e816590cd9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2183,7 +2183,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, sector_t last_block_in_file; const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; - struct extent_info ei = {0, }; + struct extent_info ei = {}; bool from_dnode = true; int i; int ret = 0; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 1bd38a78ebba..342af24b2f8c 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -546,7 +546,8 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_node *en; bool ret = false; - f2fs_bug_on(sbi, !et); + if (!et) + return false; trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); @@ -881,12 +882,14 @@ static unsigned long long __calculate_block_age(unsigned long long new, } /* This returns a new age and allocated blocks in ei */ -static int __get_new_block_age(struct inode *inode, struct extent_info *ei) +static int __get_new_block_age(struct inode *inode, struct extent_info *ei, + block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t f_size = i_size_read(inode); unsigned long long cur_blocks = atomic64_read(&sbi->allocated_data_blocks); + struct extent_info tei = *ei; /* only fofs and len are valid */ /* * When I/O is not aligned to a PAGE_SIZE, update will happen to the last @@ -894,20 +897,20 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) * block here. */ if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && - ei->blk == NEW_ADDR) + blkaddr == NEW_ADDR) return -EINVAL; - if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { + if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { unsigned long long cur_age; - if (cur_blocks >= ei->last_blocks) - cur_age = cur_blocks - ei->last_blocks; + if (cur_blocks >= tei.last_blocks) + cur_age = cur_blocks - tei.last_blocks; else /* allocated_data_blocks overflow */ - cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; + cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; - if (ei->age) - ei->age = __calculate_block_age(cur_age, ei->age); + if (tei.age) + ei->age = __calculate_block_age(cur_age, tei.age); else ei->age = cur_age; ei->last_blocks = cur_blocks; @@ -915,14 +918,14 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) return 0; } - f2fs_bug_on(sbi, ei->blk == NULL_ADDR); + f2fs_bug_on(sbi, blkaddr == NULL_ADDR); /* the data block was allocated for the first time */ - if (ei->blk == NEW_ADDR) + if (blkaddr == NEW_ADDR) goto out; - if (__is_valid_data_blkaddr(ei->blk) && - !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_bug_on(sbi, 1); return -EINVAL; } @@ -938,7 +941,7 @@ out: static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { - struct extent_info ei; + struct extent_info ei = {}; if (!__may_extent_tree(dn->inode, type)) return; @@ -953,8 +956,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ else ei.blk = dn->data_blkaddr; } else if (type == EX_BLOCK_AGE) { - ei.blk = dn->data_blkaddr; - if (__get_new_block_age(dn->inode, &ei)) + if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) return; } __update_extent_tree_range(dn->inode, &ei, type); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e8953c3dc81a..9a3ffa39ad30 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3469,15 +3469,15 @@ void f2fs_truncate_data_blocks(struct dnode_of_data *dn); int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); -int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); -int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int f2fs_fileattr_set(struct user_namespace *mnt_userns, +int f2fs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -3505,7 +3505,7 @@ void f2fs_handle_failed_inode(struct inode *inode); int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct inode **new_inode); /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a6c401279886..b90617639743 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -837,7 +837,7 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return false; } -int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); @@ -892,7 +892,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); /* we need to show initial sectors used for inline_data/dentries */ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || @@ -903,13 +903,13 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, } #ifdef CONFIG_F2FS_FS_POSIX_ACL -static void __setattr_copy(struct user_namespace *mnt_userns, +static void __setattr_copy(struct mnt_idmap *idmap, struct inode *inode, const struct iattr *attr) { unsigned int ia_valid = attr->ia_valid; - i_uid_update(mnt_userns, attr, inode); - i_gid_update(mnt_userns, attr, inode); + i_uid_update(idmap, attr, inode); + i_gid_update(idmap, attr, inode); if (ia_valid & ATTR_ATIME) inode->i_atime = attr->ia_atime; if (ia_valid & ATTR_MTIME) @@ -918,10 +918,10 @@ static void __setattr_copy(struct user_namespace *mnt_userns, inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); if (!vfsgid_in_group_p(vfsgid) && - !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) + !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) mode &= ~S_ISGID; set_acl_inode(inode, mode); } @@ -930,7 +930,7 @@ static void __setattr_copy(struct user_namespace *mnt_userns, #define __setattr_copy setattr_copy #endif -int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -951,7 +951,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, !f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - err = setattr_prepare(mnt_userns, dentry, attr); + err = setattr_prepare(idmap, dentry, attr); if (err) return err; @@ -963,15 +963,15 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (err) return err; - if (is_quota_modification(mnt_userns, inode, attr)) { + if (is_quota_modification(idmap, inode, attr)) { err = f2fs_dquot_initialize(inode); if (err) return err; } - if (i_uid_needs_update(mnt_userns, attr, inode) || - i_gid_needs_update(mnt_userns, attr, inode)) { + if (i_uid_needs_update(idmap, attr, inode) || + i_gid_needs_update(idmap, attr, inode)) { f2fs_lock_op(F2FS_I_SB(inode)); - err = dquot_transfer(mnt_userns, inode, attr); + err = dquot_transfer(idmap, inode, attr); if (err) { set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); @@ -982,8 +982,8 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, * update uid/gid under lock_op(), so that dquot and inode can * be updated atomically. */ - i_uid_update(mnt_userns, attr, inode); - i_gid_update(mnt_userns, attr, inode); + i_uid_update(idmap, attr, inode); + i_gid_update(idmap, attr, inode); f2fs_mark_inode_dirty_sync(inode, true); f2fs_unlock_op(F2FS_I_SB(inode)); } @@ -1023,10 +1023,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, spin_unlock(&F2FS_I(inode)->i_size_lock); } - __setattr_copy(mnt_userns, inode, attr); + __setattr_copy(idmap, inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(mnt_userns, dentry, f2fs_get_inode_mode(inode)); + err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) @@ -2038,14 +2038,14 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) { struct inode *inode = file_inode(filp); - struct user_namespace *mnt_userns = file_mnt_user_ns(filp); + struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inode *pinode; loff_t isize; int ret; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2095,7 +2095,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; } - ret = f2fs_get_tmpfile(mnt_userns, pinode, &fi->cow_inode); + ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); iput(pinode); if (ret) { f2fs_up_write(&fi->i_gc_rwsem[WRITE]); @@ -2135,10 +2135,10 @@ out: static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); - struct user_namespace *mnt_userns = file_mnt_user_ns(filp); + struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2167,10 +2167,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) static int f2fs_ioc_abort_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); - struct user_namespace *mnt_userns = file_mnt_user_ns(filp); + struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2559,7 +2559,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map = { .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; - struct extent_info ei = {0, }; + struct extent_info ei = {}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; @@ -3090,7 +3090,7 @@ int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int f2fs_fileattr_set(struct user_namespace *mnt_userns, +int f2fs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6032589099ce..d8e01bbbf27f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -202,7 +202,7 @@ static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, file_set_hot(inode); } -static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, +static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, struct inode *dir, umode_t mode, const char *name) { @@ -225,7 +225,7 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, nid_free = true; - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; @@ -246,7 +246,7 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(mnt_userns, + F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -333,7 +333,7 @@ fail_drop: return ERR_PTR(err); } -static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int f2fs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -350,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode, dentry->d_name.name); + inode = f2fs_new_inode(idmap, dir, mode, dentry->d_name.name); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -659,7 +659,7 @@ static const char *f2fs_get_link(struct dentry *dentry, return link; } -static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int f2fs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -682,7 +682,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO, NULL); + inode = f2fs_new_inode(idmap, dir, S_IFLNK | S_IRWXUGO, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -739,7 +739,7 @@ out_free_encrypted_link: return err; } -static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -753,7 +753,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode, NULL); + inode = f2fs_new_inode(idmap, dir, S_IFDIR | mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -794,7 +794,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } -static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int f2fs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -810,7 +810,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode, NULL); + inode = f2fs_new_inode(idmap, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -837,7 +837,7 @@ out: return err; } -static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode, bool is_whiteout, struct inode **new_inode) { @@ -849,7 +849,7 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode, NULL); + inode = f2fs_new_inode(idmap, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -907,7 +907,7 @@ out: return err; } -static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -918,28 +918,28 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = __f2fs_tmpfile(mnt_userns, dir, file, mode, false, NULL); + err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL); return finish_open_simple(file, err); } -static int f2fs_create_whiteout(struct user_namespace *mnt_userns, +static int f2fs_create_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct inode **whiteout) { if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) return -EIO; - return __f2fs_tmpfile(mnt_userns, dir, NULL, + return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE, true, whiteout); } -int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct inode **new_inode) { - return __f2fs_tmpfile(mnt_userns, dir, NULL, S_IFREG, false, new_inode); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode); } -static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -979,7 +979,7 @@ static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout); + err = f2fs_create_whiteout(idmap, old_dir, &whiteout); if (err) return err; } @@ -1295,7 +1295,7 @@ out: return err; } -static int f2fs_rename2(struct user_namespace *mnt_userns, +static int f2fs_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -1318,7 +1318,7 @@ static int f2fs_rename2(struct user_namespace *mnt_userns, * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ - return f2fs_rename(mnt_userns, old_dir, old_dentry, + return f2fs_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); } @@ -1342,12 +1342,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, return target; } -static int f2fs_encrypted_symlink_getattr(struct user_namespace *mnt_userns, +static int f2fs_encrypted_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - f2fs_getattr(mnt_userns, path, stat, request_mask, query_flags); + f2fs_getattr(idmap, path, stat, request_mask, query_flags); return fscrypt_symlink_getattr(path, stat); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 77fd453949b1..dfd41908b12d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -258,15 +258,15 @@ static int recover_quota_data(struct inode *inode, struct page *page) attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid)); attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid)); - if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&init_user_ns, inode))) + if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&nop_mnt_idmap, inode))) attr.ia_valid |= ATTR_UID; - if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&init_user_ns, inode))) + if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&nop_mnt_idmap, inode))) attr.ia_valid |= ATTR_GID; if (!attr.ia_valid) return 0; - err = dquot_transfer(&init_user_ns, inode, &attr); + err = dquot_transfer(&nop_mnt_idmap, inode, &attr); if (err) set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); return err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 25ddea478fc1..ae3c4e5474ef 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -663,8 +663,7 @@ init_thread: if (IS_ERR(fcc->f2fs_issue_flush)) { int err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); - SM_I(sbi)->fcc_info = NULL; + fcc->f2fs_issue_flush = NULL; return err; } @@ -3161,7 +3160,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_info ei; + struct extent_info ei = {}; if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { if (!ei.age) @@ -5138,11 +5137,9 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) init_f2fs_rwsem(&sm_info->curseg_lock); - if (!f2fs_readonly(sbi->sb)) { - err = f2fs_create_flush_cmd_control(sbi); - if (err) - return err; - } + err = f2fs_create_flush_cmd_control(sbi); + if (err) + return err; err = create_discard_cmd_control(sbi); if (err) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index dc2e8637189e..d92edbbdc30e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -65,7 +65,7 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, } static int f2fs_xattr_generic_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -109,7 +109,7 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler, } static int f2fs_xattr_advise_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -117,7 +117,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, unsigned char old_advise = F2FS_I(inode)->i_advise; unsigned char new_advise; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EPERM; if (value == NULL) return -EINVAL; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index a415c02ede39..e3b690b48e3e 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -398,10 +398,10 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int fat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern void fat_truncate_blocks(struct inode *inode, loff_t offset); -extern int fat_getattr(struct user_namespace *mnt_userns, +extern int fat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); extern int fat_file_fsync(struct file *file, loff_t start, loff_t end, diff --git a/fs/fat/file.c b/fs/fat/file.c index 8a6b493b5b5f..795a4fad5c40 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -90,13 +90,13 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) * out the RO attribute for checking by the security * module, just because it maps to a file mode. */ - err = security_inode_setattr(file_mnt_user_ns(file), + err = security_inode_setattr(file_mnt_idmap(file), file->f_path.dentry, &ia); if (err) goto out_unlock_inode; /* This MUST be done before doing anything irreversible... */ - err = fat_setattr(file_mnt_user_ns(file), file->f_path.dentry, &ia); + err = fat_setattr(file_mnt_idmap(file), file->f_path.dentry, &ia); if (err) goto out_unlock_inode; @@ -395,13 +395,13 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset) fat_flush_inodes(inode->i_sb, inode, NULL); } -int fat_getattr(struct user_namespace *mnt_userns, const struct path *path, +int fat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct inode *inode = d_inode(path->dentry); struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); stat->blksize = sbi->cluster_size; if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) { @@ -456,14 +456,14 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, return 0; } -static int fat_allow_set_time(struct user_namespace *mnt_userns, +static int fat_allow_set_time(struct mnt_idmap *idmap, struct msdos_sb_info *sbi, struct inode *inode) { umode_t allow_utime = sbi->options.allow_utime; - if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), + if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) { - if (vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode))) + if (vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) allow_utime >>= 3; if (allow_utime & MAY_WRITE) return 1; @@ -477,7 +477,7 @@ static int fat_allow_set_time(struct user_namespace *mnt_userns, /* valid file mode bits */ #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) -int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int fat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -488,11 +488,11 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, /* Check for setting the inode time. */ ia_valid = attr->ia_valid; if (ia_valid & TIMES_SET_FLAGS) { - if (fat_allow_set_time(mnt_userns, sbi, inode)) + if (fat_allow_set_time(idmap, sbi, inode)) attr->ia_valid &= ~TIMES_SET_FLAGS; } - error = setattr_prepare(mnt_userns, dentry, attr); + error = setattr_prepare(idmap, dentry, attr); attr->ia_valid = ia_valid; if (error) { if (sbi->options.quiet) @@ -518,10 +518,10 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } if (((attr->ia_valid & ATTR_UID) && - (!uid_eq(from_vfsuid(mnt_userns, i_user_ns(inode), attr->ia_vfsuid), + (!uid_eq(from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid), sbi->options.fs_uid))) || ((attr->ia_valid & ATTR_GID) && - (!gid_eq(from_vfsgid(mnt_userns, i_user_ns(inode), attr->ia_vfsgid), + (!gid_eq(from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid), sbi->options.fs_gid))) || ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~FAT_VALID_MODE))) @@ -564,7 +564,7 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, fat_truncate_time(inode, &attr->ia_mtime, S_MTIME); attr->ia_valid &= ~(ATTR_ATIME|ATTR_CTIME|ATTR_MTIME); - setattr_copy(mnt_userns, inode, attr); + setattr_copy(idmap, inode, attr); mark_inode_dirty(inode); out: return error; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index efba301d68ae..2116c486843b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -261,7 +261,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, } /***** Create a file */ -static int msdos_create(struct user_namespace *mnt_userns, struct inode *dir, +static int msdos_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; @@ -339,7 +339,7 @@ out: } /***** Make a directory */ -static int msdos_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int msdos_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct super_block *sb = dir->i_sb; @@ -594,7 +594,7 @@ error_inode: } /***** Rename, a wrapper for rename_same_dir & rename_diff_dir */ -static int msdos_rename(struct user_namespace *mnt_userns, +static int msdos_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 21620054e1c4..fceda1de4805 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -756,7 +756,7 @@ error: return ERR_PTR(err); } -static int vfat_create(struct user_namespace *mnt_userns, struct inode *dir, +static int vfat_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; @@ -844,7 +844,7 @@ out: return err; } -static int vfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int vfat_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct super_block *sb = dir->i_sb; @@ -1158,7 +1158,7 @@ error_exchange: goto out; } -static int vfat_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, +static int vfat_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/fcntl.c b/fs/fcntl.c index 146c9ab0cd4b..b622be119706 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -10,6 +10,7 @@ #include <linux/mm.h> #include <linux/sched/task.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/capability.h> @@ -47,7 +48,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) /* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) - if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode)) + if (!inode_owner_or_capable(file_mnt_idmap(filp), inode)) return -EPERM; /* required for strict SunOS emulation */ diff --git a/fs/file_table.c b/fs/file_table.c index dd88701e54a9..372653b92617 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/eventpoll.h> diff --git a/fs/freevxfs/Kconfig b/fs/freevxfs/Kconfig index c05c71d57291..0e2fc08f7de4 100644 --- a/fs/freevxfs/Kconfig +++ b/fs/freevxfs/Kconfig @@ -8,7 +8,7 @@ config VXFS_FS of SCO UnixWare (and possibly others) and optionally available for Sunsoft Solaris, HP-UX and many other operating systems. However these particular OS implementations of vxfs may differ in on-disk - data endianess and/or superblock offset. The vxfs module has been + data endianness and/or superblock offset. The vxfs module has been tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.) Currently only readonly access is supported and VxFX versions 2, 3 and 4. Tests were performed with HP-UX VxFS version 3. diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index ab8ceddf9efa..cdf991bdd9de 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -141,13 +141,14 @@ static bool fscache_is_acquire_pending(struct fscache_volume *volume) static void fscache_wait_on_volume_collision(struct fscache_volume *candidate, unsigned int collidee_debug_id) { - wait_var_event_timeout(&candidate->flags, - !fscache_is_acquire_pending(candidate), 20 * HZ); + wait_on_bit_timeout(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING, + TASK_UNINTERRUPTIBLE, 20 * HZ); if (fscache_is_acquire_pending(candidate)) { pr_notice("Potential volume collision new=%08x old=%08x", candidate->debug_id, collidee_debug_id); fscache_stat(&fscache_n_volumes_collision); - wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate)); + wait_on_bit(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING, + TASK_UNINTERRUPTIBLE); } } @@ -279,8 +280,7 @@ static void fscache_create_volume_work(struct work_struct *work) fscache_end_cache_access(volume->cache, fscache_access_acquire_volume_end); - clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags); - wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING); + clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags); fscache_put_volume(volume, fscache_volume_put_create_work); } @@ -347,8 +347,8 @@ static void fscache_wake_pending_volume(struct fscache_volume *volume, hlist_bl_for_each_entry(cursor, p, h, hash_link) { if (fscache_volume_same(cursor, volume)) { fscache_see_volume(cursor, fscache_volume_see_hash_wake); - clear_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &cursor->flags); - wake_up_bit(&cursor->flags, FSCACHE_VOLUME_ACQUIRE_PENDING); + clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, + &cursor->flags); return; } } diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index a4850aee2639..3d192b80a561 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -11,9 +11,10 @@ #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> -struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) +static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc, + struct mnt_idmap *idmap, + struct inode *inode, int type, bool rcu) { - struct fuse_conn *fc = get_fuse_conn(inode); int size; const char *name; void *value = NULL; @@ -25,7 +26,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) if (fuse_is_bad(inode)) return ERR_PTR(-EIO); - if (!fc->posix_acl || fc->no_getxattr) + if (fc->no_getxattr) return NULL; if (type == ACL_TYPE_ACCESS) @@ -53,7 +54,47 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) return acl; } -int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +static inline bool fuse_no_acl(const struct fuse_conn *fc, + const struct inode *inode) +{ + /* + * Refuse interacting with POSIX ACLs for daemons that + * don't support FUSE_POSIX_ACL and are not mounted on + * the host to retain backwards compatibility. + */ + return !fc->posix_acl && (i_user_ns(inode) != &init_user_ns); +} + +struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap, + struct dentry *dentry, int type) +{ + struct inode *inode = d_inode(dentry); + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fuse_no_acl(fc, inode)) + return ERR_PTR(-EOPNOTSUPP); + + return __fuse_get_acl(fc, idmap, inode, type, false); +} + +struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + /* + * FUSE daemons before FUSE_POSIX_ACL was introduced could get and set + * POSIX ACLs without them being used for permission checking by the + * vfs. Retain that behavior for backwards compatibility as there are + * filesystems that do all permission checking for acls in the daemon + * and not in the kernel. + */ + if (!fc->posix_acl) + return NULL; + + return __fuse_get_acl(fc, &nop_mnt_idmap, inode, type, rcu); +} + +int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct inode *inode = d_inode(dentry); @@ -64,7 +105,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (fuse_is_bad(inode)) return -EIO; - if (!fc->posix_acl || fc->no_setxattr) + if (fc->no_setxattr || fuse_no_acl(fc, inode)) return -EOPNOTSUPP; if (type == ACL_TYPE_ACCESS) @@ -99,8 +140,14 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, return ret; } - if (!vfsgid_in_group_p(i_gid_into_vfsgid(&init_user_ns, inode)) && - !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) + /* + * Fuse daemons without FUSE_POSIX_ACL never changed the passed + * through POSIX ACLs. Such daemons don't expect setgid bits to + * be stripped. + */ + if (fc->posix_acl && + !vfsgid_in_group_p(i_gid_into_vfsgid(&nop_mnt_idmap, inode)) && + !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; ret = fuse_setxattr(inode, name, value, size, 0, extra_flags); @@ -108,8 +155,15 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, } else { ret = fuse_removexattr(inode, name); } - forget_all_cached_acls(inode); - fuse_invalidate_attr(inode); + + if (fc->posix_acl) { + /* + * Fuse daemons without FUSE_POSIX_ACL never cached POSIX ACLs + * and didn't invalidate attributes. Retain that behavior. + */ + forget_all_cached_acls(inode); + fuse_invalidate_attr(inode); + } return ret; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index cd1a071b625a..cd1eae61e84c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -645,7 +645,7 @@ out_err: return err; } -static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *, +static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, @@ -686,7 +686,7 @@ out_dput: return err; mknod: - err = fuse_mknod(&init_user_ns, dir, entry, mode, 0); + err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); if (err) goto out_dput; no_open: @@ -773,7 +773,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, return err; } -static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev) { struct fuse_mknod_in inarg; @@ -796,13 +796,13 @@ static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir, return create_new_entry(fm, &args, dir, entry, mode); } -static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir, +static int fuse_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode, bool excl) { - return fuse_mknod(&init_user_ns, dir, entry, mode, 0); + return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); } -static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct fuse_conn *fc = get_fuse_conn(dir); @@ -819,7 +819,7 @@ static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_mkdir_in inarg; @@ -841,7 +841,7 @@ static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir, return create_new_entry(fm, &args, dir, entry, S_IFDIR); } -static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, const char *link) { struct fuse_mount *fm = get_fuse_mount(dir); @@ -998,7 +998,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, return err; } -static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir, +static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent, struct inode *newdir, struct dentry *newent, unsigned int flags) { @@ -1156,7 +1156,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, forget_all_cached_acls(inode); err = fuse_do_getattr(inode, stat, file); } else if (stat) { - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; } @@ -1326,7 +1326,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask) * access request is sent. Execute permission is still checked * locally based on file mode. */ -static int fuse_permission(struct user_namespace *mnt_userns, +static int fuse_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -1358,7 +1358,7 @@ static int fuse_permission(struct user_namespace *mnt_userns, } if (fc->default_permissions) { - err = generic_permission(&init_user_ns, inode, mask); + err = generic_permission(&nop_mnt_idmap, inode, mask); /* If permission is denied, try to refresh file attributes. This is also needed, because the root @@ -1366,7 +1366,7 @@ static int fuse_permission(struct user_namespace *mnt_userns, if (err == -EACCES && !refreshed) { err = fuse_perm_getattr(inode, mask); if (!err) - err = generic_permission(&init_user_ns, + err = generic_permission(&nop_mnt_idmap, inode, mask); } @@ -1690,7 +1690,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (!fc->default_permissions) attr->ia_valid |= ATTR_FORCE; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; @@ -1837,7 +1837,7 @@ error: return err; } -static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry, +static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry, struct iattr *attr) { struct inode *inode = d_inode(entry); @@ -1900,7 +1900,7 @@ static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry, return ret; } -static int fuse_getattr(struct user_namespace *mnt_userns, +static int fuse_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -1942,7 +1942,8 @@ static const struct inode_operations fuse_dir_inode_operations = { .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, - .get_inode_acl = fuse_get_acl, + .get_inode_acl = fuse_get_inode_acl, + .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, @@ -1964,7 +1965,8 @@ static const struct inode_operations fuse_common_inode_operations = { .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, - .get_inode_acl = fuse_get_acl, + .get_inode_acl = fuse_get_inode_acl, + .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 875314ee6f59..82710d103556 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -18,6 +18,7 @@ #include <linux/falloc.h> #include <linux/uio.h> #include <linux/fs.h> +#include <linux/filelock.h> static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, int opcode, @@ -1313,7 +1314,8 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) return err; if (fc->handle_killpriv_v2 && - setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) { + setattr_should_drop_suidgid(&nop_mnt_idmap, + file_inode(file))) { goto writethrough; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c673faefdcb9..9b5058cf5bc3 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1264,12 +1264,12 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size); int fuse_removexattr(struct inode *inode, const char *name); extern const struct xattr_handler *fuse_xattr_handlers[]; -extern const struct xattr_handler *fuse_acl_xattr_handlers[]; -extern const struct xattr_handler *fuse_no_acl_xattr_handlers[]; struct posix_acl; -struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu); -int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu); +struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap, + struct dentry *dentry, int type); +int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry, struct posix_acl *acl, int type); /* readdir.c */ @@ -1309,7 +1309,7 @@ long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int fuse_fileattr_set(struct user_namespace *mnt_userns, +int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* file.c */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6b3beda16c1b..de9b9ec5ce81 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -311,7 +311,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } -static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) +static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, + struct fuse_conn *fc) { inode->i_mode = attr->mode & S_IFMT; inode->i_size = attr->size; @@ -333,6 +334,12 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) new_decode_dev(attr->rdev)); } else BUG(); + /* + * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL + * so they see the exact same behavior as before. + */ + if (!fc->posix_acl) + inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; } static int fuse_inode_eq(struct inode *inode, void *_nodeidp) @@ -372,7 +379,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, if (!inode) return NULL; - fuse_init_inode(inode, attr); + fuse_init_inode(inode, attr, fc); get_fuse_inode(inode)->nodeid = nodeid; inode->i_flags |= S_AUTOMOUNT; goto done; @@ -388,7 +395,7 @@ retry: if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; - fuse_init_inode(inode, attr); + fuse_init_inode(inode, attr, fc); unlock_new_inode(inode); } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ @@ -1174,7 +1181,6 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, if ((flags & FUSE_POSIX_ACL)) { fc->default_permissions = 1; fc->posix_acl = 1; - fm->sb->s_xattr = fuse_acl_xattr_handlers; } if (flags & FUSE_CACHE_SYMLINKS) fc->cache_symlinks = 1; @@ -1420,13 +1426,6 @@ static void fuse_sb_defaults(struct super_block *sb) if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); - - /* - * If we are not in the initial user namespace posix - * acls must be translated. - */ - if (sb->s_user_ns != &init_user_ns) - sb->s_xattr = fuse_no_acl_xattr_handlers; } static int fuse_fill_super_submount(struct super_block *sb, diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index fcce94ace2c2..e50a18ee6cc6 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -467,7 +467,7 @@ cleanup: return err; } -int fuse_fileattr_set(struct user_namespace *mnt_userns, +int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 0d3e7177fce0..49c01559580f 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -189,7 +189,7 @@ static int fuse_xattr_get(const struct xattr_handler *handler, } static int fuse_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -203,27 +203,6 @@ static int fuse_xattr_set(const struct xattr_handler *handler, return fuse_setxattr(inode, name, value, size, flags, 0); } -static bool no_xattr_list(struct dentry *dentry) -{ - return false; -} - -static int no_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size) -{ - return -EOPNOTSUPP; -} - -static int no_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, - struct dentry *dentry, struct inode *nodee, - const char *name, const void *value, - size_t size, int flags) -{ - return -EOPNOTSUPP; -} - static const struct xattr_handler fuse_xattr_handler = { .prefix = "", .get = fuse_xattr_get, @@ -234,33 +213,3 @@ const struct xattr_handler *fuse_xattr_handlers[] = { &fuse_xattr_handler, NULL }; - -const struct xattr_handler *fuse_acl_xattr_handlers[] = { - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, - &fuse_xattr_handler, - NULL -}; - -static const struct xattr_handler fuse_no_acl_access_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - .flags = ACL_TYPE_ACCESS, - .list = no_xattr_list, - .get = no_xattr_get, - .set = no_xattr_set, -}; - -static const struct xattr_handler fuse_no_acl_default_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_DEFAULT, - .flags = ACL_TYPE_ACCESS, - .list = no_xattr_list, - .get = no_xattr_get, - .set = no_xattr_set, -}; - -const struct xattr_handler *fuse_no_acl_xattr_handlers[] = { - &fuse_no_acl_access_xattr_handler, - &fuse_no_acl_default_xattr_handler, - &fuse_xattr_handler, - NULL -}; diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3dcde4912413..a392aa0f041d 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -109,7 +109,7 @@ out: return error; } -int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct inode *inode = d_inode(dentry); @@ -135,7 +135,7 @@ int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { - ret = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl); + ret = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (ret) goto unlock; } diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index b8de8c148f5c..d4deb2b19959 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -13,7 +13,7 @@ extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index eea5be4fbf0e..300844f50dcd 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/gfs2_ondisk.h> #include <linux/falloc.h> #include <linux/swap.h> @@ -235,7 +236,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) goto out; if (!IS_IMMUTABLE(inode)) { - error = gfs2_permission(&init_user_ns, inode, MAY_WRITE); + error = gfs2_permission(&nop_mnt_idmap, inode, MAY_WRITE); if (error) goto out; } @@ -273,7 +274,7 @@ out: return error; } -int gfs2_fileattr_set(struct user_namespace *mnt_userns, +int gfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 614db3055c02..713efa3bb732 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -320,7 +320,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, } if (!is_root) { - error = gfs2_permission(&init_user_ns, dir, MAY_EXEC); + error = gfs2_permission(&nop_mnt_idmap, dir, MAY_EXEC); if (error) goto out; } @@ -350,7 +350,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, { int error; - error = gfs2_permission(&init_user_ns, &dip->i_inode, + error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -843,7 +843,7 @@ fail: /** * gfs2_create - Create a file - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The directory in which to create the file * @dentry: The dentry of the new file * @mode: The mode of the new file @@ -852,7 +852,7 @@ fail: * Returns: errno */ -static int gfs2_create(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl); @@ -960,7 +960,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) goto out_gunlock; - error = gfs2_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC); if (error) goto out_gunlock; @@ -1078,7 +1078,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (IS_APPEND(&dip->i_inode)) return -EPERM; - error = gfs2_permission(&init_user_ns, &dip->i_inode, + error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -1207,7 +1207,7 @@ out_inodes: /** * gfs2_symlink - Create a symlink - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The directory to create the symlink in * @dentry: The dentry to put the symlink in * @symname: The thing which the link points to @@ -1215,7 +1215,7 @@ out_inodes: * Returns: errno */ -static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { unsigned int size; @@ -1229,7 +1229,7 @@ static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir, /** * gfs2_mkdir - Make a directory - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The parent directory of the new one * @dentry: The dentry of the new directory * @mode: The mode of the new directory @@ -1237,7 +1237,7 @@ static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir, * Returns: errno */ -static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir)); @@ -1246,7 +1246,7 @@ static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir, /** * gfs2_mknod - Make a special file - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The directory in which the special file will reside * @dentry: The dentry of the special file * @mode: The mode of the special file @@ -1254,7 +1254,7 @@ static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir, * */ -static int gfs2_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0); @@ -1504,7 +1504,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } } } else { - error = gfs2_permission(&init_user_ns, ndir, + error = gfs2_permission(&nop_mnt_idmap, ndir, MAY_WRITE | MAY_EXEC); if (error) goto out_gunlock; @@ -1541,7 +1541,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Check out the dir to be renamed */ if (dir_rename) { - error = gfs2_permission(&init_user_ns, d_inode(odentry), + error = gfs2_permission(&nop_mnt_idmap, d_inode(odentry), MAY_WRITE); if (error) goto out_gunlock; @@ -1705,13 +1705,13 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry, goto out_gunlock; if (S_ISDIR(old_mode)) { - error = gfs2_permission(&init_user_ns, odentry->d_inode, + error = gfs2_permission(&nop_mnt_idmap, odentry->d_inode, MAY_WRITE); if (error) goto out_gunlock; } if (S_ISDIR(new_mode)) { - error = gfs2_permission(&init_user_ns, ndentry->d_inode, + error = gfs2_permission(&nop_mnt_idmap, ndentry->d_inode, MAY_WRITE); if (error) goto out_gunlock; @@ -1766,7 +1766,7 @@ out: return error; } -static int gfs2_rename2(struct user_namespace *mnt_userns, struct inode *odir, +static int gfs2_rename2(struct mnt_idmap *idmap, struct inode *odir, struct dentry *odentry, struct inode *ndir, struct dentry *ndentry, unsigned int flags) { @@ -1841,7 +1841,7 @@ out: /** * gfs2_permission - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: The inode * @mask: The mask to be tested * @@ -1852,7 +1852,7 @@ out: * Returns: errno */ -int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, +int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct gfs2_inode *ip; @@ -1872,7 +1872,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) error = -EPERM; else - error = generic_permission(&init_user_ns, inode, mask); + error = generic_permission(&nop_mnt_idmap, inode, mask); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); @@ -1881,7 +1881,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr) { - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } @@ -1966,7 +1966,7 @@ out: /** * gfs2_setattr - Change attributes on an inode - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: The dentry which is changing * @attr: The structure describing the change * @@ -1976,7 +1976,7 @@ out: * Returns: errno */ -static int gfs2_setattr(struct user_namespace *mnt_userns, +static int gfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -1992,11 +1992,11 @@ static int gfs2_setattr(struct user_namespace *mnt_userns, if (error) goto out; - error = may_setattr(&init_user_ns, inode, attr->ia_valid); + error = may_setattr(&nop_mnt_idmap, inode, attr->ia_valid); if (error) goto error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) goto error; @@ -2007,7 +2007,7 @@ static int gfs2_setattr(struct user_namespace *mnt_userns, else { error = gfs2_setattr_simple(inode, attr); if (!error && attr->ia_valid & ATTR_MODE) - error = posix_acl_chmod(&init_user_ns, dentry, + error = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); } @@ -2022,7 +2022,7 @@ out: /** * gfs2_getattr - Read out an inode's attributes - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @path: Object to query * @stat: The inode's stats * @request_mask: Mask of STATX_xxx flags indicating the caller's interests @@ -2037,7 +2037,7 @@ out: * Returns: errno */ -static int gfs2_getattr(struct user_namespace *mnt_userns, +static int gfs2_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -2066,7 +2066,7 @@ static int gfs2_getattr(struct user_namespace *mnt_userns, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 0264d514dda7..c8c5814e7295 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -99,7 +99,7 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip); extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); -extern int gfs2_permission(struct user_namespace *mnt_userns, +extern int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); @@ -111,7 +111,7 @@ extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; extern int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa); -extern int gfs2_fileattr_set(struct user_namespace *mnt_userns, +extern int gfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); extern void gfs2_set_inode_flags(struct inode *inode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 723639376ae2..61323deb80bc 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -80,6 +80,15 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) brelse(bd->bd_bh); } +static int __gfs2_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret = mapping->a_ops->writepage(page, wbc); + mapping_set_error(mapping, ret); + return ret; +} + /** * gfs2_ail1_start_one - Start I/O on a transaction * @sdp: The superblock @@ -131,7 +140,7 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - ret = filemap_fdatawrite_wbc(mapping, wbc); + ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping); if (need_resched()) { blk_finish_plug(plug); cond_resched(); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 518c0677e12a..adf6d17cf033 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1225,7 +1225,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, } static int gfs2_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c index 2bd54efaf416..6341bb248247 100644 --- a/fs/hfs/attr.c +++ b/fs/hfs/attr.c @@ -121,7 +121,7 @@ static int hfs_xattr_get(const struct xattr_handler *handler, } static int hfs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 527f6e46cbe8..3e1e3dcf0b48 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -189,7 +189,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file) * a directory and return a corresponding inode, given the inode for * the directory and the name (and its length) of the new file. */ -static int hfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int hfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; @@ -219,7 +219,7 @@ static int hfs_create(struct user_namespace *mnt_userns, struct inode *dir, * in a directory, given the inode for the parent directory and the * name (and its length) of the new directory. */ -static int hfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int hfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -280,7 +280,7 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) * new file/directory. * XXX: how do you handle must_be dir? */ -static int hfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int hfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 68d0305880f7..49d02524e667 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -206,7 +206,7 @@ int hfs_write_begin(struct file *file, struct address_space *mapping, extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t); extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); extern int hfs_write_inode(struct inode *, struct writeback_control *); -extern int hfs_inode_setattr(struct user_namespace *, struct dentry *, +extern int hfs_inode_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, __be32 log_size, __be32 phys_size, u32 clump_size); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9c329a365e75..1f7bd068acf0 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) /* panic? */ return -EIO; + res = -EIO; if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) - return -EIO; + goto out; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) - /* panic? */ goto out; if (S_ISDIR(main_inode->i_mode)) { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); + if (fd.entrylength < sizeof(struct hfs_cat_dir)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); } else if (HFS_IS_RSRC(inode)) { + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); hfs_inode_write_fork(inode, rec.file.RExtRec, @@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || @@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } + res = 0; out: hfs_find_exit(&fd); - return 0; + return res; } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, @@ -606,14 +611,14 @@ static int hfs_file_release(struct inode *inode, struct file *file) * correspond to the same HFS file. */ -int hfs_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); int error; - error = setattr_prepare(&init_user_ns, dentry, + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); /* basic permission checks */ if (error) return error; @@ -653,7 +658,7 @@ int hfs_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, current_time(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 84714bbccc12..56fb5f1312e7 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -434,7 +434,7 @@ out: return res; } -static int hfsplus_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int hfsplus_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); @@ -476,7 +476,7 @@ out: return res; } -static int hfsplus_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int hfsplus_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); @@ -517,19 +517,19 @@ out: return res; } -static int hfsplus_create(struct user_namespace *mnt_userns, struct inode *dir, +static int hfsplus_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return hfsplus_mknod(&init_user_ns, dir, dentry, mode, 0); + return hfsplus_mknod(&nop_mnt_idmap, dir, dentry, mode, 0); } -static int hfsplus_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int hfsplus_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - return hfsplus_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0); + return hfsplus_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0); } -static int hfsplus_rename(struct user_namespace *mnt_userns, +static int hfsplus_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 6aa919e59483..7ededcb720c1 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -481,13 +481,13 @@ void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork); int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd); int hfsplus_cat_write_inode(struct inode *inode); -int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path, +int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int hfsplus_fileattr_set(struct user_namespace *mnt_userns, +int hfsplus_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* ioctl.c */ diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 840577a0c1e7..abb91f5fae92 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -246,13 +246,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) return 0; } -static int hfsplus_setattr(struct user_namespace *mnt_userns, +static int hfsplus_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -270,13 +270,13 @@ static int hfsplus_setattr(struct user_namespace *mnt_userns, inode->i_mtime = inode->i_ctime = current_time(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } -int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path, +int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -298,7 +298,7 @@ int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); return 0; } @@ -390,7 +390,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, return NULL; inode->i_ino = sbi->next_cnid++; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); set_nlink(inode, 1); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); @@ -655,7 +655,7 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int hfsplus_fileattr_set(struct user_namespace *mnt_userns, +int hfsplus_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 49891b12c415..5b476f57eb17 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -857,7 +857,7 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler, } static int hfsplus_osx_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index c1c7a16cbf21..90f68ec119cd 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -23,7 +23,7 @@ static int hfsplus_security_getxattr(const struct xattr_handler *handler, } static int hfsplus_security_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index e150372ec564..fdbaebc1c49a 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -22,7 +22,7 @@ static int hfsplus_trusted_getxattr(const struct xattr_handler *handler, } static int hfsplus_trusted_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index a6b60b153916..6464b6c3d58d 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -22,7 +22,7 @@ static int hfsplus_user_getxattr(const struct xattr_handler *handler, } static int hfsplus_user_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 277468783fee..c18bb50c31b6 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -559,7 +559,7 @@ static int read_name(struct inode *ino, char *name) return 0; } -static int hostfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int hostfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; @@ -658,7 +658,7 @@ static int hostfs_unlink(struct inode *ino, struct dentry *dentry) return err; } -static int hostfs_symlink(struct user_namespace *mnt_userns, struct inode *ino, +static int hostfs_symlink(struct mnt_idmap *idmap, struct inode *ino, struct dentry *dentry, const char *to) { char *file; @@ -671,7 +671,7 @@ static int hostfs_symlink(struct user_namespace *mnt_userns, struct inode *ino, return err; } -static int hostfs_mkdir(struct user_namespace *mnt_userns, struct inode *ino, +static int hostfs_mkdir(struct mnt_idmap *idmap, struct inode *ino, struct dentry *dentry, umode_t mode) { char *file; @@ -696,7 +696,7 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry) return err; } -static int hostfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int hostfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { struct inode *inode; @@ -734,7 +734,7 @@ static int hostfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int hostfs_rename2(struct user_namespace *mnt_userns, +static int hostfs_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -763,7 +763,7 @@ static int hostfs_rename2(struct user_namespace *mnt_userns, return err; } -static int hostfs_permission(struct user_namespace *mnt_userns, +static int hostfs_permission(struct mnt_idmap *idmap, struct inode *ino, int desired) { char *name; @@ -786,11 +786,11 @@ static int hostfs_permission(struct user_namespace *mnt_userns, err = access_file(name, r, w, x); __putname(name); if (!err) - err = generic_permission(&init_user_ns, ino, desired); + err = generic_permission(&nop_mnt_idmap, ino, desired); return err; } -static int hostfs_setattr(struct user_namespace *mnt_userns, +static int hostfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -800,7 +800,7 @@ static int hostfs_setattr(struct user_namespace *mnt_userns, int fd = HOSTFS_I(inode)->fd; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; @@ -857,7 +857,7 @@ static int hostfs_setattr(struct user_namespace *mnt_userns, attr->ia_size != i_size_read(inode)) truncate_setsize(inode, attr->ia_size); - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 167ec6884642..f5a2476c47bf 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -280,7 +280,7 @@ void hpfs_init_inode(struct inode *); void hpfs_read_inode(struct inode *); void hpfs_write_inode(struct inode *); void hpfs_write_inode_nolock(struct inode *); -int hpfs_setattr(struct user_namespace *, struct dentry *, struct iattr *); +int hpfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); void hpfs_write_if_changed(struct inode *); void hpfs_evict_inode(struct inode *); diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 82208cc28ebd..e50e92a42432 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -257,7 +257,7 @@ void hpfs_write_inode_nolock(struct inode *i) brelse(bh); } -int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int hpfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -275,7 +275,7 @@ int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) goto out_unlock; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) goto out_unlock; @@ -289,7 +289,7 @@ int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, hpfs_truncate(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); hpfs_write_inode(inode); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 15fc63276caa..69fb40b2c99a 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -20,7 +20,7 @@ static void hpfs_update_directory_times(struct inode *dir) hpfs_write_inode_nolock(dir); } -static int hpfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { const unsigned char *name = dentry->d_name.name; @@ -129,7 +129,7 @@ bail: return err; } -static int hpfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { const unsigned char *name = dentry->d_name.name; @@ -217,7 +217,7 @@ bail: return err; } -static int hpfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { const unsigned char *name = dentry->d_name.name; @@ -292,7 +292,7 @@ bail: return err; } -static int hpfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symlink) { const unsigned char *name = dentry->d_name.name; @@ -512,7 +512,7 @@ const struct address_space_operations hpfs_symlink_aops = { .read_folio = hpfs_symlink_read_folio }; -static int hpfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int hpfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 790d2727141a..0ce1cc4c2add 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -898,7 +898,7 @@ out: return error; } -static int hugetlbfs_setattr(struct user_namespace *mnt_userns, +static int hugetlbfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -907,7 +907,7 @@ static int hugetlbfs_setattr(struct user_namespace *mnt_userns, unsigned int ia_valid = attr->ia_valid; struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -924,7 +924,7 @@ static int hugetlbfs_setattr(struct user_namespace *mnt_userns, hugetlb_vmtruncate(inode, newsize); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } @@ -980,7 +980,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); inode->i_ino = get_next_ino(); - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, &hugetlbfs_i_mmap_rwsem_key); inode->i_mapping->a_ops = &hugetlbfs_aops; @@ -1019,7 +1019,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, /* * File creation. Allocate an inode, and we're done.. */ -static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { struct inode *inode; @@ -1033,24 +1033,24 @@ static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, return 0; } -static int hugetlbfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - int retval = hugetlbfs_mknod(&init_user_ns, dir, dentry, + int retval = hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0); if (!retval) inc_nlink(dir); return retval; } -static int hugetlbfs_create(struct user_namespace *mnt_userns, +static int hugetlbfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return hugetlbfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0); + return hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0); } -static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns, +static int hugetlbfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { @@ -1064,7 +1064,7 @@ static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns, return finish_open_simple(file, 0); } -static int hugetlbfs_symlink(struct user_namespace *mnt_userns, +static int hugetlbfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { diff --git a/fs/init.c b/fs/init.c index 5c36adaa9b44..9684406a8416 100644 --- a/fs/init.c +++ b/fs/init.c @@ -157,7 +157,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev) mode &= ~current_umask(); error = security_path_mknod(&path, dentry, mode, dev); if (!error) - error = vfs_mknod(mnt_user_ns(path.mnt), path.dentry->d_inode, + error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode, new_decode_dev(dev)); done_path_create(&path, dentry); return error; @@ -167,7 +167,7 @@ int __init init_link(const char *oldname, const char *newname) { struct dentry *new_dentry; struct path old_path, new_path; - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; int error; error = kern_path(oldname, 0, &old_path); @@ -182,14 +182,14 @@ int __init init_link(const char *oldname, const char *newname) error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; - mnt_userns = mnt_user_ns(new_path.mnt); - error = may_linkat(mnt_userns, &old_path); + idmap = mnt_idmap(new_path.mnt); + error = may_linkat(idmap, &old_path); if (unlikely(error)) goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode, + error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode, new_dentry, NULL); out_dput: done_path_create(&new_path, new_dentry); @@ -209,7 +209,7 @@ int __init init_symlink(const char *oldname, const char *newname) return PTR_ERR(dentry); error = security_path_symlink(&path, dentry, oldname); if (!error) - error = vfs_symlink(mnt_user_ns(path.mnt), path.dentry->d_inode, + error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, oldname); done_path_create(&path, dentry); return error; @@ -233,7 +233,7 @@ int __init init_mkdir(const char *pathname, umode_t mode) mode &= ~current_umask(); error = security_path_mkdir(&path, dentry, mode); if (!error) - error = vfs_mkdir(mnt_user_ns(path.mnt), path.dentry->d_inode, + error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); return error; diff --git a/fs/inode.c b/fs/inode.c index f453eb58fd03..4558dc2f1355 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -5,6 +5,7 @@ */ #include <linux/export.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/mm.h> #include <linux/backing-dev.h> #include <linux/hash.h> @@ -1893,7 +1894,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode) /* Atime updates will likely cause i_uid and i_gid to be written * back improprely if their true value is unknown to the vfs. */ - if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode)) + if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode)) return false; if (IS_NOATIME(inode)) @@ -1953,7 +1954,7 @@ EXPORT_SYMBOL(touch_atime); * response to write or truncate. Return 0 if nothing has to be changed. * Negative value on error (change should be denied). */ -int dentry_needs_remove_privs(struct user_namespace *mnt_userns, +int dentry_needs_remove_privs(struct mnt_idmap *idmap, struct dentry *dentry) { struct inode *inode = d_inode(dentry); @@ -1963,7 +1964,7 @@ int dentry_needs_remove_privs(struct user_namespace *mnt_userns, if (IS_NOSEC(inode)) return 0; - mask = setattr_should_drop_suidgid(mnt_userns, inode); + mask = setattr_should_drop_suidgid(idmap, inode); ret = security_inode_need_killpriv(dentry); if (ret < 0) return ret; @@ -1972,7 +1973,7 @@ int dentry_needs_remove_privs(struct user_namespace *mnt_userns, return mask; } -static int __remove_privs(struct user_namespace *mnt_userns, +static int __remove_privs(struct mnt_idmap *idmap, struct dentry *dentry, int kill) { struct iattr newattrs; @@ -1982,7 +1983,7 @@ static int __remove_privs(struct user_namespace *mnt_userns, * Note we call this on write, so notify_change will not * encounter any conflicting delegations: */ - return notify_change(mnt_userns, dentry, &newattrs, NULL); + return notify_change(idmap, dentry, &newattrs, NULL); } static int __file_remove_privs(struct file *file, unsigned int flags) @@ -1995,7 +1996,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags) if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode)) return 0; - kill = dentry_needs_remove_privs(file_mnt_user_ns(file), dentry); + kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry); if (kill < 0) return kill; @@ -2003,7 +2004,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags) if (flags & IOCB_NOWAIT) return -EAGAIN; - error = __remove_privs(file_mnt_user_ns(file), dentry, kill); + error = __remove_privs(file_mnt_idmap(file), dentry, kill); } if (!error) @@ -2279,21 +2280,21 @@ EXPORT_SYMBOL(init_special_inode); /** * inode_init_owner - Init uid,gid,mode for new inode according to posix standards - * @mnt_userns: User namespace of the mount the inode was created from + * @idmap: idmap of the mount the inode was created from * @inode: New inode * @dir: Directory inode * @mode: mode of the new inode * - * If the inode has been created through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions + * If the inode has been created through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions * and initializing i_uid and i_gid. On non-idmapped mounts or if permission - * checking is to be performed on the raw inode simply passs init_user_ns. + * checking is to be performed on the raw inode simply pass @nop_mnt_idmap. */ -void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode, +void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode) { - inode_fsuid_set(inode, mnt_userns); + inode_fsuid_set(inode, idmap); if (dir && dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; @@ -2301,32 +2302,32 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode, if (S_ISDIR(mode)) mode |= S_ISGID; } else - inode_fsgid_set(inode, mnt_userns); + inode_fsgid_set(inode, idmap); inode->i_mode = mode; } EXPORT_SYMBOL(inode_init_owner); /** * inode_owner_or_capable - check current task permissions to inode - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: inode being checked * * Return true if current either has CAP_FOWNER in a namespace with the * inode owner uid mapped, or owns the file. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -bool inode_owner_or_capable(struct user_namespace *mnt_userns, +bool inode_owner_or_capable(struct mnt_idmap *idmap, const struct inode *inode) { vfsuid_t vfsuid; struct user_namespace *ns; - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) return true; @@ -2458,7 +2459,7 @@ EXPORT_SYMBOL(current_time); /** * in_group_or_capable - check whether caller is CAP_FSETID privileged - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @inode: inode to check * @vfsgid: the new/current vfsgid of @inode * @@ -2468,19 +2469,19 @@ EXPORT_SYMBOL(current_time); * * Return: true if the caller is sufficiently privileged, false if not. */ -bool in_group_or_capable(struct user_namespace *mnt_userns, +bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid) { if (vfsgid_in_group_p(vfsgid)) return true; - if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) + if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) return true; return false; } /** * mode_strip_sgid - handle the sgid bit for non-directories - * @mnt_userns: User namespace of the mount the inode was created from + * @idmap: idmap of the mount the inode was created from * @dir: parent directory inode * @mode: mode of the file to be created in @dir * @@ -2492,15 +2493,14 @@ bool in_group_or_capable(struct user_namespace *mnt_userns, * * Return: the new mode to use for the file */ -umode_t mode_strip_sgid(struct user_namespace *mnt_userns, +umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode) { if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP)) return mode; if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID)) return mode; - if (in_group_or_capable(mnt_userns, dir, - i_gid_into_vfsgid(mnt_userns, dir))) + if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir))) return mode; return mode & ~S_ISGID; } diff --git a/fs/internal.h b/fs/internal.h index a803cc3cf716..766e8a554b2c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -14,9 +14,9 @@ struct path; struct mount; struct shrink_control; struct fs_context; -struct user_namespace; struct pipe_inode_info; struct iov_iter; +struct mnt_idmap; /* * block/bdev.c @@ -63,7 +63,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); int do_rmdir(int dfd, struct filename *name); int do_unlinkat(int dfd, struct filename *name); -int may_linkat(struct user_namespace *mnt_userns, const struct path *link); +int may_linkat(struct mnt_idmap *idmap, const struct path *link); int do_renameat2(int olddfd, struct filename *oldname, int newdfd, struct filename *newname, unsigned int flags); int do_mkdirat(int dfd, struct filename *name, umode_t mode); @@ -150,8 +150,8 @@ extern int vfs_open(const struct path *, struct file *); * inode.c */ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); -int dentry_needs_remove_privs(struct user_namespace *, struct dentry *dentry); -bool in_group_or_capable(struct user_namespace *mnt_userns, +int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry); +bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid); /* @@ -234,7 +234,7 @@ ssize_t do_getxattr(struct mnt_idmap *idmap, int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_ctx *ctx); -int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode); +int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode); #ifdef CONFIG_FS_POSIX_ACL int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, @@ -261,5 +261,8 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po /* * fs/attr.c */ -int setattr_should_drop_sgid(struct user_namespace *mnt_userns, +int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); +struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); diff --git a/fs/ioctl.c b/fs/ioctl.c index 80ac36aea913..5b2481cd4750 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -651,7 +651,7 @@ static int fileattr_set_prepare(struct inode *inode, /** * vfs_fileattr_set - change miscellaneous file attributes - * @mnt_userns: user namespace of the mount + * @idmap: idmap of the mount * @dentry: the object to change * @fa: fileattr pointer * @@ -665,7 +665,7 @@ static int fileattr_set_prepare(struct inode *inode, * * Return: 0 on success, or a negative error on failure. */ -int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, +int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); @@ -675,7 +675,7 @@ int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, if (!inode->i_op->fileattr_set) return -ENOIOCTLCMD; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EPERM; inode_lock(inode); @@ -693,7 +693,7 @@ int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, } err = fileattr_set_prepare(inode, &old_ma, fa); if (!err) - err = inode->i_op->fileattr_set(mnt_userns, dentry, fa); + err = inode->i_op->fileattr_set(idmap, dentry, fa); } inode_unlock(inode); @@ -714,7 +714,7 @@ static int ioctl_getflags(struct file *file, unsigned int __user *argp) static int ioctl_setflags(struct file *file, unsigned int __user *argp) { - struct user_namespace *mnt_userns = file_mnt_user_ns(file); + struct mnt_idmap *idmap = file_mnt_idmap(file); struct dentry *dentry = file->f_path.dentry; struct fileattr fa; unsigned int flags; @@ -725,7 +725,7 @@ static int ioctl_setflags(struct file *file, unsigned int __user *argp) err = mnt_want_write_file(file); if (!err) { fileattr_fill_flags(&fa, flags); - err = vfs_fileattr_set(mnt_userns, dentry, &fa); + err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); } } @@ -746,7 +746,7 @@ static int ioctl_fsgetxattr(struct file *file, void __user *argp) static int ioctl_fssetxattr(struct file *file, void __user *argp) { - struct user_namespace *mnt_userns = file_mnt_user_ns(file); + struct mnt_idmap *idmap = file_mnt_idmap(file); struct dentry *dentry = file->f_path.dentry; struct fileattr fa; int err; @@ -755,7 +755,7 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp) if (!err) { err = mnt_want_write_file(file); if (!err) { - err = vfs_fileattr_set(mnt_userns, dentry, &fa); + err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); } } diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 8bb58ce5c06c..888a7ceb6479 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -229,7 +229,7 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a return rc; } -int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int jffs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int rc, xprefix; @@ -241,7 +241,7 @@ int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (acl) { umode_t mode; - rc = posix_acl_update_mode(&init_user_ns, inode, &mode, + rc = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (rc) return rc; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index ca36a6eca594..e976b8cb82cf 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu); -int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int jffs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); extern int jffs2_init_acl_post(struct inode *); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index f399b390b5f6..5075a0a6d594 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -24,20 +24,20 @@ static int jffs2_readdir (struct file *, struct dir_context *); -static int jffs2_create (struct user_namespace *, struct inode *, +static int jffs2_create (struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); static struct dentry *jffs2_lookup (struct inode *,struct dentry *, unsigned int); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); static int jffs2_unlink (struct inode *,struct dentry *); -static int jffs2_symlink (struct user_namespace *, struct inode *, +static int jffs2_symlink (struct mnt_idmap *, struct inode *, struct dentry *, const char *); -static int jffs2_mkdir (struct user_namespace *, struct inode *,struct dentry *, +static int jffs2_mkdir (struct mnt_idmap *, struct inode *,struct dentry *, umode_t); static int jffs2_rmdir (struct inode *,struct dentry *); -static int jffs2_mknod (struct user_namespace *, struct inode *,struct dentry *, +static int jffs2_mknod (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); -static int jffs2_rename (struct user_namespace *, struct inode *, +static int jffs2_rename (struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); @@ -160,7 +160,7 @@ static int jffs2_readdir(struct file *file, struct dir_context *ctx) /***********************************************************************/ -static int jffs2_create(struct user_namespace *mnt_userns, struct inode *dir_i, +static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i, struct dentry *dentry, umode_t mode, bool excl) { struct jffs2_raw_inode *ri; @@ -279,7 +279,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de /***********************************************************************/ -static int jffs2_symlink (struct user_namespace *mnt_userns, struct inode *dir_i, +static int jffs2_symlink (struct mnt_idmap *idmap, struct inode *dir_i, struct dentry *dentry, const char *target) { struct jffs2_inode_info *f, *dir_f; @@ -442,7 +442,7 @@ static int jffs2_symlink (struct user_namespace *mnt_userns, struct inode *dir_i } -static int jffs2_mkdir (struct user_namespace *mnt_userns, struct inode *dir_i, +static int jffs2_mkdir (struct mnt_idmap *idmap, struct inode *dir_i, struct dentry *dentry, umode_t mode) { struct jffs2_inode_info *f, *dir_f; @@ -614,7 +614,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) return ret; } -static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i, +static int jffs2_mknod (struct mnt_idmap *idmap, struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev) { struct jffs2_inode_info *f, *dir_f; @@ -762,7 +762,7 @@ static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i, return ret; } -static int jffs2_rename (struct user_namespace *mnt_userns, +static int jffs2_rename (struct mnt_idmap *idmap, struct inode *old_dir_i, struct dentry *old_dentry, struct inode *new_dir_i, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 66af51c41619..09174898efd0 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -190,19 +190,19 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) return 0; } -int jffs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int jffs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int rc; - rc = setattr_prepare(&init_user_ns, dentry, iattr); + rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (rc) return rc; rc = jffs2_do_setattr(inode, iattr); if (!rc && (iattr->ia_valid & ATTR_MODE)) - rc = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); + rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); return rc; } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 921d782583d6..8da19766c101 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -164,7 +164,7 @@ long jffs2_ioctl(struct file *, unsigned int, unsigned long); extern const struct inode_operations jffs2_symlink_inode_operations; /* fs.c */ -int jffs2_setattr (struct user_namespace *, struct dentry *, struct iattr *); +int jffs2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index aef5522551db..437f3a2c1b54 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c @@ -57,7 +57,7 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler, } static int jffs2_security_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index cc3f24883e7d..b7c5da2d89bd 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c @@ -25,7 +25,7 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler, } static int jffs2_trusted_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index fb945977c013..f64edce4927b 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c @@ -25,7 +25,7 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler, } static int jffs2_user_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 3b667eccc73b..fb96f872d207 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -94,7 +94,7 @@ out: return rc; } -int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int rc; @@ -106,7 +106,7 @@ int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, tid = txBegin(inode->i_sb, 0); mutex_lock(&JFS_IP(inode)->commit_mutex); if (type == ACL_TYPE_ACCESS && acl) { - rc = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl); + rc = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (rc) goto end_tx; if (mode != inode->i_mode) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 88663465aecd..2ee35be49de1 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -85,24 +85,24 @@ static int jfs_release(struct inode *inode, struct file *file) return 0; } -int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int jfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int rc; - rc = setattr_prepare(&init_user_ns, dentry, iattr); + rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (rc) return rc; - if (is_quota_modification(mnt_userns, inode, iattr)) { + if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) { rc = dquot_initialize(inode); if (rc) return rc; } if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { - rc = dquot_transfer(mnt_userns, inode, iattr); + rc = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (rc) return rc; } @@ -119,11 +119,11 @@ int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, jfs_truncate(inode); } - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) - rc = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); + rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); return rc; } diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 1e7b177ece60..ed7989bc2db1 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -70,7 +70,7 @@ int jfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int jfs_fileattr_set(struct user_namespace *mnt_userns, +int jfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index f0704a25835f..f892e54d0fcd 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -8,7 +8,7 @@ #ifdef CONFIG_JFS_POSIX_ACL struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu); -int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 59379089e939..9e1f02767201 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -64,7 +64,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) goto fail_put; } - inode_init_owner(&init_user_ns, inode, parent, mode); + inode_init_owner(&nop_mnt_idmap, inode, parent, mode); /* * New inodes need to save sane values on disk when * uid & gid mount options are used diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 7de961a81862..ea80661597ac 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -10,7 +10,7 @@ struct fid; extern struct inode *ialloc(struct inode *, umode_t); extern int jfs_fsync(struct file *, loff_t, loff_t, int); extern int jfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -extern int jfs_fileattr_set(struct user_namespace *mnt_userns, +extern int jfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); @@ -28,7 +28,7 @@ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); extern void jfs_set_inode_flags(struct inode *); extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern int jfs_setattr(struct user_namespace *, struct dentry *, struct iattr *); +extern int jfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern const struct address_space_operations jfs_aops; extern const struct inode_operations jfs_dir_inode_operations; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index a38d14eed047..b29d68b5eec5 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -59,7 +59,7 @@ static inline void free_ea_wmap(struct inode *inode) * RETURN: Errors from subroutines * */ -static int jfs_create(struct user_namespace *mnt_userns, struct inode *dip, +static int jfs_create(struct mnt_idmap *idmap, struct inode *dip, struct dentry *dentry, umode_t mode, bool excl) { int rc = 0; @@ -192,7 +192,7 @@ static int jfs_create(struct user_namespace *mnt_userns, struct inode *dip, * note: * EACCES: user needs search+write permission on the parent directory */ -static int jfs_mkdir(struct user_namespace *mnt_userns, struct inode *dip, +static int jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip, struct dentry *dentry, umode_t mode) { int rc = 0; @@ -869,7 +869,7 @@ static int jfs_link(struct dentry *old_dentry, * an intermediate result whose length exceeds PATH_MAX [XPG4.2] */ -static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip, +static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip, struct dentry *dentry, const char *name) { int rc; @@ -1059,7 +1059,7 @@ static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip, * * FUNCTION: rename a file or directory */ -static int jfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -1345,7 +1345,7 @@ static int jfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, * * FUNCTION: Create a special file (device) */ -static int jfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int jfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct jfs_inode_info *jfs_ip; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index f9273f6901c8..f817798fa1eb 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -932,7 +932,7 @@ static int jfs_xattr_get(const struct xattr_handler *handler, } static int jfs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -951,7 +951,7 @@ static int jfs_xattr_get_os2(const struct xattr_handler *handler, } static int jfs_xattr_set_os2(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 935ef8cb02b2..e3181c3e1988 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1200,7 +1200,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, return d_splice_alias(inode, dentry); } -static int kernfs_iop_mkdir(struct user_namespace *mnt_userns, +static int kernfs_iop_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { @@ -1238,7 +1238,7 @@ static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) return ret; } -static int kernfs_iop_rename(struct user_namespace *mnt_userns, +static int kernfs_iop_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index eac0f210299a..30494dcb0df3 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -107,7 +107,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) return ret; } -int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); @@ -120,7 +120,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, root = kernfs_root(kn); down_write(&root->kernfs_rwsem); - error = setattr_prepare(&init_user_ns, dentry, iattr); + error = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (error) goto out; @@ -129,7 +129,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto out; /* this ignores size changes */ - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); out: up_write(&root->kernfs_rwsem); @@ -181,7 +181,7 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) set_nlink(inode, kn->dir.subdirs + 2); } -int kernfs_iop_getattr(struct user_namespace *mnt_userns, +int kernfs_iop_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -191,7 +191,7 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns, down_read(&root->kernfs_rwsem); kernfs_refresh_inode(kn, inode); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); up_read(&root->kernfs_rwsem); return 0; @@ -272,7 +272,7 @@ void kernfs_evict_inode(struct inode *inode) kernfs_put(kn); } -int kernfs_iop_permission(struct user_namespace *mnt_userns, +int kernfs_iop_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct kernfs_node *kn; @@ -287,7 +287,7 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns, down_read(&root->kernfs_rwsem); kernfs_refresh_inode(kn, inode); - ret = generic_permission(&init_user_ns, inode, mask); + ret = generic_permission(&nop_mnt_idmap, inode, mask); up_read(&root->kernfs_rwsem); return ret; @@ -324,7 +324,7 @@ static int kernfs_vfs_xattr_get(const struct xattr_handler *handler, } static int kernfs_vfs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *suffix, const void *value, size_t size, int flags) @@ -391,7 +391,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn, } static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *suffix, const void *value, size_t size, int flags) diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 9046d9f39e63..236c3a6113f1 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -127,11 +127,11 @@ extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; */ extern const struct xattr_handler *kernfs_xattr_handlers[]; void kernfs_evict_inode(struct inode *inode); -int kernfs_iop_permission(struct user_namespace *mnt_userns, +int kernfs_iop_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); -int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); -int kernfs_iop_getattr(struct user_namespace *mnt_userns, +int kernfs_iop_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 2a39ffb8423b..6e61b5bc7d86 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); dn_len = le16_to_cpu(authblob->DomainName.Length); - if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len || + nt_len < CIFS_ENCPWD_SIZE) return -EINVAL; /* TODO : use domain name that imported from configuration file */ diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 12be8386446a..56be077e5d8a 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -280,7 +280,7 @@ int ksmbd_conn_handler_loop(void *p) { struct ksmbd_conn *conn = (struct ksmbd_conn *)p; struct ksmbd_transport *t = conn->transport; - unsigned int pdu_size; + unsigned int pdu_size, max_allowed_pdu_size; char hdr_buf[4] = {0,}; int size; @@ -305,20 +305,36 @@ int ksmbd_conn_handler_loop(void *p) pdu_size = get_rfc1002_len(hdr_buf); ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size); + if (conn->status == KSMBD_SESS_GOOD) + max_allowed_pdu_size = + SMB3_MAX_MSGSIZE + conn->vals->max_write_size; + else + max_allowed_pdu_size = SMB3_MAX_MSGSIZE; + + if (pdu_size > max_allowed_pdu_size) { + pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n", + pdu_size, max_allowed_pdu_size, + conn->status); + break; + } + /* * Check if pdu size is valid (min : smb header size, * max : 0x00FFFFFF). */ if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || pdu_size > MAX_STREAM_PROT_LEN) { - continue; + break; } /* 4 for rfc1002 length field */ size = pdu_size + 4; - conn->request_buf = kvmalloc(size, GFP_KERNEL); + conn->request_buf = kvmalloc(size, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); if (!conn->request_buf) - continue; + break; memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); if (!ksmbd_smb_request(conn)) diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index b6bd8311e6b4..fb8b2d566efb 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -106,7 +106,8 @@ struct ksmbd_startup_request { __u32 sub_auth[3]; /* Subauth value for Security ID */ __u32 smb2_max_credits; /* MAX credits */ __u32 smbd_max_io_size; /* smbd read write size */ - __u32 reserved[127]; /* Reserved room */ + __u32 max_connections; /* Number of maximum simultaneous connections */ + __u32 reserved[126]; /* Reserved room */ __u32 ifc_list_sz; /* interfaces list size */ __s8 ____payload[]; }; diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c index 0ae8d08d85a8..3507d8f89074 100644 --- a/fs/ksmbd/ndr.c +++ b/fs/ksmbd/ndr.c @@ -242,7 +242,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da) return ret; if (da->version != 3 && da->version != 4) { - pr_err("v%d version is not supported\n", da->version); + ksmbd_debug(VFS, "v%d version is not supported\n", da->version); return -EINVAL; } @@ -251,7 +251,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da) return ret; if (da->version != version2) { - pr_err("ndr version mismatched(version: %d, version2: %d)\n", + ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n", da->version, version2); return -EINVAL; } @@ -338,7 +338,7 @@ static int ndr_encode_posix_acl_entry(struct ndr *n, struct xattr_smb_acl *acl) } int ndr_encode_posix_acl(struct ndr *n, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct inode *inode, struct xattr_smb_acl *acl, struct xattr_smb_acl *def_acl) @@ -374,11 +374,11 @@ int ndr_encode_posix_acl(struct ndr *n, if (ret) return ret; - vfsuid = i_uid_into_vfsuid(user_ns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); ret = ndr_write_int64(n, from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid))); if (ret) return ret; - vfsgid = i_gid_into_vfsgid(user_ns, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); ret = ndr_write_int64(n, from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid))); if (ret) return ret; @@ -457,7 +457,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl) if (ret) return ret; if (acl->version != 4) { - pr_err("v%d version is not supported\n", acl->version); + ksmbd_debug(VFS, "v%d version is not supported\n", acl->version); return -EINVAL; } @@ -465,7 +465,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl) if (ret) return ret; if (acl->version != version2) { - pr_err("ndr version mismatched(version: %d, version2: %d)\n", + ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n", acl->version, version2); return -EINVAL; } diff --git a/fs/ksmbd/ndr.h b/fs/ksmbd/ndr.h index 60ca265d1bb0..f3c108c8cf4d 100644 --- a/fs/ksmbd/ndr.h +++ b/fs/ksmbd/ndr.h @@ -14,7 +14,7 @@ struct ndr { int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da); int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da); -int ndr_encode_posix_acl(struct ndr *n, struct user_namespace *user_ns, +int ndr_encode_posix_acl(struct ndr *n, struct mnt_idmap *idmap, struct inode *inode, struct xattr_smb_acl *acl, struct xattr_smb_acl *def_acl); int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl); diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c index d7d47b82451d..2e54ded4d92c 100644 --- a/fs/ksmbd/oplock.c +++ b/fs/ksmbd/oplock.c @@ -1608,9 +1608,9 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp) { struct create_posix_rsp *buf; struct inode *inode = file_inode(fp->filp); - struct user_namespace *user_ns = file_mnt_user_ns(fp->filp); - vfsuid_t vfsuid = i_uid_into_vfsuid(user_ns, inode); - vfsgid_t vfsgid = i_gid_into_vfsgid(user_ns, inode); + struct mnt_idmap *idmap = file_mnt_idmap(fp->filp); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); buf = (struct create_posix_rsp *)cc; memset(buf, 0, sizeof(struct create_posix_rsp)); diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h index ac9d932f8c8a..db7278181760 100644 --- a/fs/ksmbd/server.h +++ b/fs/ksmbd/server.h @@ -41,6 +41,7 @@ struct ksmbd_server_config { unsigned int share_fake_fscaps; struct smb_sid domain_sid; unsigned int auth_mechs; + unsigned int max_connections; char *conf[SERVER_CONF_WORK_GROUP + 1]; }; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 14d7f3599c63..4ef6e1e59a40 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -12,6 +12,7 @@ #include <linux/ethtool.h> #include <linux/falloc.h> #include <linux/mount.h> +#include <linux/filelock.h> #include "glob.h" #include "smbfsctl.h" @@ -1928,13 +1929,13 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; -out_err1: rsp->StructureSize = cpu_to_le16(16); + inc_rfc1001_len(work->response_buf, 16); +out_err1: rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; - inc_rfc1001_len(work->response_buf, 16); if (!IS_ERR(treename)) kfree(treename); @@ -1967,6 +1968,9 @@ out_err1: rsp->hdr.Status = STATUS_ACCESS_DENIED; } + if (status.ret != KSMBD_TREE_CONN_STATUS_OK) + smb2_set_err_rsp(work); + return rc; } @@ -2189,7 +2193,7 @@ out: static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, const struct path *path) { - struct user_namespace *user_ns = mnt_user_ns(path->mnt); + struct mnt_idmap *idmap = mnt_idmap(path->mnt); char *attr_name = NULL, *value; int rc = 0; unsigned int next = 0; @@ -2225,7 +2229,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, value = (char *)&eabuf->name + eabuf->EaNameLength + 1; if (!eabuf->EaValueLength) { - rc = ksmbd_vfs_casexattr_len(user_ns, + rc = ksmbd_vfs_casexattr_len(idmap, path->dentry, attr_name, XATTR_USER_PREFIX_LEN + @@ -2233,7 +2237,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, /* delete the EA only when it exits */ if (rc > 0) { - rc = ksmbd_vfs_remove_xattr(user_ns, + rc = ksmbd_vfs_remove_xattr(idmap, path->dentry, attr_name); @@ -2248,7 +2252,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, /* if the EA doesn't exist, just do nothing. */ rc = 0; } else { - rc = ksmbd_vfs_setxattr(user_ns, + rc = ksmbd_vfs_setxattr(idmap, path->dentry, attr_name, value, le16_to_cpu(eabuf->EaValueLength), 0); if (rc < 0) { @@ -2278,7 +2282,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, struct ksmbd_file *fp, char *stream_name, int s_type) { - struct user_namespace *user_ns = mnt_user_ns(path->mnt); + struct mnt_idmap *idmap = mnt_idmap(path->mnt); size_t xattr_stream_size; char *xattr_stream_name; int rc; @@ -2294,7 +2298,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, fp->stream.size = xattr_stream_size; /* Check if there is stream prefix in xattr space */ - rc = ksmbd_vfs_casexattr_len(user_ns, + rc = ksmbd_vfs_casexattr_len(idmap, path->dentry, xattr_stream_name, xattr_stream_size); @@ -2306,7 +2310,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, return -EBADF; } - rc = ksmbd_vfs_setxattr(user_ns, path->dentry, + rc = ksmbd_vfs_setxattr(idmap, path->dentry, xattr_stream_name, NULL, 0, 0); if (rc < 0) pr_err("Failed to store XATTR stream name :%d\n", rc); @@ -2315,7 +2319,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, static int smb2_remove_smb_xattrs(const struct path *path) { - struct user_namespace *user_ns = mnt_user_ns(path->mnt); + struct mnt_idmap *idmap = mnt_idmap(path->mnt); char *name, *xattr_list = NULL; ssize_t xattr_list_len; int err = 0; @@ -2335,7 +2339,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) { - err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, + err = ksmbd_vfs_remove_xattr(idmap, path->dentry, name); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", @@ -2382,7 +2386,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_user_ns(path->mnt), + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path->dentry, &da); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); @@ -2401,7 +2405,7 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) return; - rc = ksmbd_vfs_get_dos_attrib_xattr(mnt_user_ns(path->mnt), + rc = ksmbd_vfs_get_dos_attrib_xattr(mnt_idmap(path->mnt), path->dentry, &da); if (rc > 0) { fp->f_ci->m_fattr = cpu_to_le32(da.attr); @@ -2476,11 +2480,11 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work, } static void ksmbd_acls_fattr(struct smb_fattr *fattr, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *inode) { - vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); fattr->cf_uid = vfsuid_into_kuid(vfsuid); fattr->cf_gid = vfsgid_into_kgid(vfsgid); @@ -2512,7 +2516,7 @@ int smb2_open(struct ksmbd_work *work) struct ksmbd_share_config *share = tcon->share_conf; struct ksmbd_file *fp = NULL; struct file *filp = NULL; - struct user_namespace *user_ns = NULL; + struct mnt_idmap *idmap = NULL; struct kstat stat; struct create_context *context; struct lease_ctx_info *lc = NULL; @@ -2765,7 +2769,7 @@ int smb2_open(struct ksmbd_work *work) rc = 0; } else { file_present = true; - user_ns = mnt_user_ns(path.mnt); + idmap = mnt_idmap(path.mnt); } if (stream_name) { if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) { @@ -2828,7 +2832,7 @@ int smb2_open(struct ksmbd_work *work) if (!file_present) { daccess = cpu_to_le32(GENERIC_ALL_FLAGS); } else { - rc = ksmbd_vfs_query_maximal_access(user_ns, + rc = ksmbd_vfs_query_maximal_access(idmap, path.dentry, &daccess); if (rc) @@ -2864,7 +2868,7 @@ int smb2_open(struct ksmbd_work *work) } created = true; - user_ns = mnt_user_ns(path.mnt); + idmap = mnt_idmap(path.mnt); if (ea_buf) { if (le32_to_cpu(ea_buf->ccontext.DataLength) < sizeof(struct smb2_ea_info)) { @@ -2886,7 +2890,7 @@ int smb2_open(struct ksmbd_work *work) * is already granted. */ if (daccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_READ_CONTROL_LE)) { - rc = inode_permission(user_ns, + rc = inode_permission(idmap, d_inode(path.dentry), may_flags); if (rc) @@ -2894,7 +2898,7 @@ int smb2_open(struct ksmbd_work *work) if ((daccess & FILE_DELETE_LE) || (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) { - rc = ksmbd_vfs_may_delete(user_ns, + rc = ksmbd_vfs_may_delete(idmap, path.dentry); if (rc) goto err_out; @@ -2957,7 +2961,7 @@ int smb2_open(struct ksmbd_work *work) int posix_acl_rc; struct inode *inode = d_inode(path.dentry); - posix_acl_rc = ksmbd_vfs_inherit_posix_acl(user_ns, + posix_acl_rc = ksmbd_vfs_inherit_posix_acl(idmap, path.dentry, d_inode(path.dentry->d_parent)); if (posix_acl_rc) @@ -2973,7 +2977,7 @@ int smb2_open(struct ksmbd_work *work) rc = smb2_create_sd_buffer(work, req, &path); if (rc) { if (posix_acl_rc) - ksmbd_vfs_set_init_posix_acl(user_ns, + ksmbd_vfs_set_init_posix_acl(idmap, path.dentry); if (test_share_config_flag(work->tcon->share_conf, @@ -2982,7 +2986,7 @@ int smb2_open(struct ksmbd_work *work) struct smb_ntsd *pntsd; int pntsd_size, ace_num = 0; - ksmbd_acls_fattr(&fattr, user_ns, inode); + ksmbd_acls_fattr(&fattr, idmap, inode); if (fattr.cf_acls) ace_num = fattr.cf_acls->a_count; if (fattr.cf_dacls) @@ -2996,7 +3000,7 @@ int smb2_open(struct ksmbd_work *work) if (!pntsd) goto err_out; - rc = build_sec_desc(user_ns, + rc = build_sec_desc(idmap, pntsd, NULL, 0, OWNER_SECINFO | GROUP_SECINFO | @@ -3010,7 +3014,7 @@ int smb2_open(struct ksmbd_work *work) } rc = ksmbd_vfs_set_sd_xattr(conn, - user_ns, + idmap, path.dentry, pntsd, pntsd_size); @@ -3206,7 +3210,7 @@ int smb2_open(struct ksmbd_work *work) struct create_context *mxac_ccontext; if (maximal_access == 0) - ksmbd_vfs_query_maximal_access(user_ns, + ksmbd_vfs_query_maximal_access(idmap, path.dentry, &maximal_access); mxac_ccontext = (struct create_context *)(rsp->Buffer + @@ -3631,7 +3635,7 @@ static void unlock_dir(struct ksmbd_file *dir_fp) static int process_query_dir_entries(struct smb2_query_dir_private *priv) { - struct user_namespace *user_ns = file_mnt_user_ns(priv->dir_fp->filp); + struct mnt_idmap *idmap = file_mnt_idmap(priv->dir_fp->filp); struct kstat kstat; struct ksmbd_kstat ksmbd_kstat; int rc; @@ -3644,7 +3648,7 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv) return -EINVAL; lock_dir(priv->dir_fp); - dent = lookup_one(user_ns, priv->d_info->name, + dent = lookup_one(idmap, priv->d_info->name, priv->dir_fp->filp->f_path.dentry, priv->d_info->name_len); unlock_dir(priv->dir_fp); @@ -3665,7 +3669,7 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv) ksmbd_kstat.kstat = &kstat; if (priv->info_level != FILE_NAMES_INFORMATION) ksmbd_vfs_fill_dentry_attrs(priv->work, - user_ns, + idmap, dent, &ksmbd_kstat); @@ -3895,7 +3899,7 @@ int smb2_query_dir(struct ksmbd_work *work) } if (!(dir_fp->daccess & FILE_LIST_DIRECTORY_LE) || - inode_permission(file_mnt_user_ns(dir_fp->filp), + inode_permission(file_mnt_idmap(dir_fp->filp), file_inode(dir_fp->filp), MAY_READ | MAY_EXEC)) { pr_err("no right to enumerate directory (%pD)\n", dir_fp->filp); @@ -4161,7 +4165,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, ssize_t buf_free_len, alignment_bytes, next_offset, rsp_data_cnt = 0; struct smb2_ea_info_req *ea_req = NULL; const struct path *path; - struct user_namespace *user_ns = file_mnt_user_ns(fp->filp); + struct mnt_idmap *idmap = file_mnt_idmap(fp->filp); if (!(fp->daccess & FILE_READ_EA_LE)) { pr_err("Not permitted to read ext attr : 0x%x\n", @@ -4241,7 +4245,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, buf_free_len -= (offsetof(struct smb2_ea_info, name) + name_len + 1); /* bailout if xattr can't fit in buf_free_len */ - value_len = ksmbd_vfs_getxattr(user_ns, path->dentry, + value_len = ksmbd_vfs_getxattr(idmap, path->dentry, name, &buf); if (value_len <= 0) { rc = -ENOENT; @@ -4331,7 +4335,7 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp, } basic_info = (struct smb2_file_basic_info *)rsp->Buffer; - generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp), + generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), &stat); basic_info->CreationTime = cpu_to_le64(fp->create_time); time = ksmbd_UnixTimeToNT(stat.atime); @@ -4372,7 +4376,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp, struct kstat stat; inode = file_inode(fp->filp); - generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat); + generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat); sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); @@ -4426,7 +4430,7 @@ static int get_file_all_info(struct ksmbd_work *work, return PTR_ERR(filename); inode = file_inode(fp->filp); - generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat); + generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat); ksmbd_debug(SMB, "filename = %s\n", filename); delete_pending = ksmbd_inode_pending_delete(fp); @@ -4503,7 +4507,7 @@ static void get_file_stream_info(struct ksmbd_work *work, int buf_free_len; struct smb2_query_info_req *req = ksmbd_req_buf_next(work); - generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp), + generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), &stat); file_info = (struct smb2_file_stream_info *)rsp->Buffer; @@ -4594,7 +4598,7 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp, struct smb2_file_internal_info *file_info; struct kstat stat; - generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp), + generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), &stat); file_info = (struct smb2_file_internal_info *)rsp->Buffer; file_info->IndexNumber = cpu_to_le64(stat.ino); @@ -4620,7 +4624,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer; inode = file_inode(fp->filp); - generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat); + generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat); file_info->CreationTime = cpu_to_le64(fp->create_time); time = ksmbd_UnixTimeToNT(stat.atime); @@ -4681,7 +4685,7 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp, struct smb2_file_comp_info *file_info; struct kstat stat; - generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp), + generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp), &stat); file_info = (struct smb2_file_comp_info *)rsp->Buffer; @@ -4722,9 +4726,9 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp, { struct smb311_posix_qinfo *file_info; struct inode *inode = file_inode(fp->filp); - struct user_namespace *user_ns = file_mnt_user_ns(fp->filp); - vfsuid_t vfsuid = i_uid_into_vfsuid(user_ns, inode); - vfsgid_t vfsgid = i_gid_into_vfsgid(user_ns, inode); + struct mnt_idmap *idmap = file_mnt_idmap(fp->filp); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); u64 time; int out_buf_len = sizeof(struct smb311_posix_qinfo) + 32; @@ -5124,7 +5128,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work, struct smb2_query_info_rsp *rsp) { struct ksmbd_file *fp; - struct user_namespace *user_ns; + struct mnt_idmap *idmap; struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL; struct smb_fattr fattr = {{0}}; struct inode *inode; @@ -5171,19 +5175,19 @@ static int smb2_get_info_sec(struct ksmbd_work *work, if (!fp) return -ENOENT; - user_ns = file_mnt_user_ns(fp->filp); + idmap = file_mnt_idmap(fp->filp); inode = file_inode(fp->filp); - ksmbd_acls_fattr(&fattr, user_ns, inode); + ksmbd_acls_fattr(&fattr, idmap, inode); if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) - ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns, + ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, idmap, fp->filp->f_path.dentry, &ppntsd); /* Check if sd buffer size exceeds response buffer size */ if (smb2_resp_buf_len(work, 8) > ppntsd_size) - rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size, + rc = build_sec_desc(idmap, pntsd, ppntsd, ppntsd_size, addition_info, &secdesclen, &fattr); posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); @@ -5413,7 +5417,7 @@ int smb2_echo(struct ksmbd_work *work) static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct smb2_file_rename_info *file_info, struct nls_table *local_nls) { @@ -5476,7 +5480,7 @@ static int smb2_rename(struct ksmbd_work *work, if (rc) goto out; - rc = ksmbd_vfs_setxattr(user_ns, + rc = ksmbd_vfs_setxattr(idmap, fp->filp->f_path.dentry, xattr_stream_name, NULL, 0, 0); @@ -5615,7 +5619,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, struct iattr attrs; struct file *filp; struct inode *inode; - struct user_namespace *user_ns; + struct mnt_idmap *idmap; int rc = 0; if (!(fp->daccess & FILE_WRITE_ATTRIBUTES_LE)) @@ -5624,7 +5628,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, attrs.ia_valid = 0; filp = fp->filp; inode = file_inode(filp); - user_ns = file_mnt_user_ns(filp); + idmap = file_mnt_idmap(filp); if (file_info->CreationTime) fp->create_time = le64_to_cpu(file_info->CreationTime); @@ -5668,7 +5672,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(user_ns, + rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, filp->f_path.dentry, &da); if (rc) ksmbd_debug(SMB, @@ -5686,7 +5690,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, inode_lock(inode); inode->i_ctime = attrs.ia_ctime; attrs.ia_valid &= ~ATTR_CTIME; - rc = notify_change(user_ns, dentry, &attrs, NULL); + rc = notify_change(idmap, dentry, &attrs, NULL); inode_unlock(inode); } return rc; @@ -5779,7 +5783,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp, struct smb2_file_rename_info *rename_info, unsigned int buf_len) { - struct user_namespace *user_ns; + struct mnt_idmap *idmap; struct ksmbd_file *parent_fp; struct dentry *parent; struct dentry *dentry = fp->filp->f_path.dentry; @@ -5794,12 +5798,12 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp, le32_to_cpu(rename_info->FileNameLength)) return -EINVAL; - user_ns = file_mnt_user_ns(fp->filp); + idmap = file_mnt_idmap(fp->filp); if (ksmbd_stream_fd(fp)) goto next; parent = dget_parent(dentry); - ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry); + ret = ksmbd_vfs_lock_parent(idmap, parent, dentry); if (ret) { dput(parent); return ret; @@ -5818,7 +5822,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp, ksmbd_fd_put(work, parent_fp); } next: - return smb2_rename(work, fp, user_ns, rename_info, + return smb2_rename(work, fp, idmap, rename_info, work->conn->local_nls); } @@ -7527,14 +7531,14 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, struct file_sparse *sparse) { struct ksmbd_file *fp; - struct user_namespace *user_ns; + struct mnt_idmap *idmap; int ret = 0; __le32 old_fattr; fp = ksmbd_lookup_fd_fast(work, id); if (!fp) return -ENOENT; - user_ns = file_mnt_user_ns(fp->filp); + idmap = file_mnt_idmap(fp->filp); old_fattr = fp->f_ci->m_fattr; if (sparse->SetSparse) @@ -7547,13 +7551,13 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) { struct xattr_dos_attrib da; - ret = ksmbd_vfs_get_dos_attrib_xattr(user_ns, + ret = ksmbd_vfs_get_dos_attrib_xattr(idmap, fp->filp->f_path.dentry, &da); if (ret <= 0) goto out; da.attr = le32_to_cpu(fp->f_ci->m_fattr); - ret = ksmbd_vfs_set_dos_attrib_xattr(user_ns, + ret = ksmbd_vfs_set_dos_attrib_xattr(idmap, fp->filp->f_path.dentry, &da); if (ret) fp->f_ci->m_fattr = old_fattr; @@ -8660,6 +8664,7 @@ int smb3_decrypt_req(struct ksmbd_work *work) bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; + struct ksmbd_session *sess = work->sess; struct smb2_hdr *rsp = smb2_get_msg(work->response_buf); if (conn->dialect < SMB30_PROT_ID) @@ -8669,6 +8674,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work) rsp = ksmbd_resp_buf_next(work); if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && + sess->user && !user_guest(sess->user) && rsp->Status == STATUS_SUCCESS) return true; return false; diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index aa5dbe54f5a1..0c8a770fe318 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -24,8 +24,9 @@ #define SMB21_DEFAULT_IOSIZE (1024 * 1024) #define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024) -#define SMB3_MIN_IOSIZE (64 * 1024) -#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) +#define SMB3_MIN_IOSIZE (64 * 1024) +#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) +#define SMB3_MAX_MSGSIZE (4 * 4096) /* * Definitions for SMB2 Protocol Data Units (network frames) diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index 2a4fbbd55b91..fa2b54df6ee6 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -307,7 +307,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, { int i, rc = 0; struct ksmbd_conn *conn = work->conn; - struct user_namespace *user_ns = file_mnt_user_ns(dir->filp); + struct mnt_idmap *idmap = file_mnt_idmap(dir->filp); for (i = 0; i < 2; i++) { struct kstat kstat; @@ -333,7 +333,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, ksmbd_kstat.kstat = &kstat; ksmbd_vfs_fill_dentry_attrs(work, - user_ns, + idmap, dentry, &ksmbd_kstat); rc = fn(conn, info_level, d_info, &ksmbd_kstat); diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index ab5c68cc0e13..6d6cfb6957a9 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -254,7 +254,7 @@ void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) ssid->num_subauth++; } -static int sid_to_id(struct user_namespace *user_ns, +static int sid_to_id(struct mnt_idmap *idmap, struct smb_sid *psid, uint sidtype, struct smb_fattr *fattr) { @@ -276,7 +276,7 @@ static int sid_to_id(struct user_namespace *user_ns, id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); uid = KUIDT_INIT(id); - uid = from_vfsuid(user_ns, &init_user_ns, VFSUIDT_INIT(uid)); + uid = from_vfsuid(idmap, &init_user_ns, VFSUIDT_INIT(uid)); if (uid_valid(uid)) { fattr->cf_uid = uid; rc = 0; @@ -287,7 +287,7 @@ static int sid_to_id(struct user_namespace *user_ns, id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); gid = KGIDT_INIT(id); - gid = from_vfsgid(user_ns, &init_user_ns, VFSGIDT_INIT(gid)); + gid = from_vfsgid(idmap, &init_user_ns, VFSGIDT_INIT(gid)); if (gid_valid(gid)) { fattr->cf_gid = gid; rc = 0; @@ -362,7 +362,7 @@ void free_acl_state(struct posix_acl_state *state) kfree(state->groups); } -static void parse_dacl(struct user_namespace *user_ns, +static void parse_dacl(struct mnt_idmap *idmap, struct smb_acl *pdacl, char *end_of_acl, struct smb_sid *pownersid, struct smb_sid *pgrpsid, struct smb_fattr *fattr) @@ -489,7 +489,7 @@ static void parse_dacl(struct user_namespace *user_ns, acl_mode = access_flags_to_mode(fattr, ppace[i]->access_req, ppace[i]->type); temp_fattr.cf_uid = INVALID_UID; - ret = sid_to_id(user_ns, &ppace[i]->sid, SIDOWNER, &temp_fattr); + ret = sid_to_id(idmap, &ppace[i]->sid, SIDOWNER, &temp_fattr); if (ret || uid_eq(temp_fattr.cf_uid, INVALID_UID)) { pr_err("%s: Error %d mapping Owner SID to uid\n", __func__, ret); @@ -575,7 +575,7 @@ static void parse_dacl(struct user_namespace *user_ns, free_acl_state(&default_acl_state); } -static void set_posix_acl_entries_dacl(struct user_namespace *user_ns, +static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, struct smb_ace *pndace, struct smb_fattr *fattr, u32 *num_aces, u16 *size, u32 nt_aces_num) @@ -600,14 +600,14 @@ static void set_posix_acl_entries_dacl(struct user_namespace *user_ns, uid_t uid; unsigned int sid_type = SIDOWNER; - uid = posix_acl_uid_translate(user_ns, pace); + uid = posix_acl_uid_translate(idmap, pace); if (!uid) sid_type = SIDUNIX_USER; id_to_sid(uid, sid_type, sid); } else if (pace->e_tag == ACL_GROUP) { gid_t gid; - gid = posix_acl_gid_translate(user_ns, pace); + gid = posix_acl_gid_translate(idmap, pace); id_to_sid(gid, SIDUNIX_GROUP, sid); } else if (pace->e_tag == ACL_OTHER && !nt_aces_num) { smb_copy_sid(sid, &sid_everyone); @@ -666,12 +666,12 @@ posix_default_acl: if (pace->e_tag == ACL_USER) { uid_t uid; - uid = posix_acl_uid_translate(user_ns, pace); + uid = posix_acl_uid_translate(idmap, pace); id_to_sid(uid, SIDCREATOR_OWNER, sid); } else if (pace->e_tag == ACL_GROUP) { gid_t gid; - gid = posix_acl_gid_translate(user_ns, pace); + gid = posix_acl_gid_translate(idmap, pace); id_to_sid(gid, SIDCREATOR_GROUP, sid); } else { kfree(sid); @@ -689,7 +689,7 @@ posix_default_acl: } } -static void set_ntacl_dacl(struct user_namespace *user_ns, +static void set_ntacl_dacl(struct mnt_idmap *idmap, struct smb_acl *pndacl, struct smb_acl *nt_dacl, unsigned int aces_size, @@ -723,13 +723,13 @@ static void set_ntacl_dacl(struct user_namespace *user_ns, } } - set_posix_acl_entries_dacl(user_ns, pndace, fattr, + set_posix_acl_entries_dacl(idmap, pndace, fattr, &num_aces, &size, nt_num_aces); pndacl->num_aces = cpu_to_le32(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } -static void set_mode_dacl(struct user_namespace *user_ns, +static void set_mode_dacl(struct mnt_idmap *idmap, struct smb_acl *pndacl, struct smb_fattr *fattr) { struct smb_ace *pace, *pndace; @@ -741,7 +741,7 @@ static void set_mode_dacl(struct user_namespace *user_ns, pace = pndace = (struct smb_ace *)((char *)pndacl + sizeof(struct smb_acl)); if (fattr->cf_acls) { - set_posix_acl_entries_dacl(user_ns, pndace, fattr, + set_posix_acl_entries_dacl(idmap, pndace, fattr, &num_aces, &size, num_aces); goto out; } @@ -808,7 +808,7 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) } /* Convert CIFS ACL to POSIX form */ -int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, +int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, int acl_len, struct smb_fattr *fattr) { int rc = 0; @@ -851,7 +851,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, return rc; } - rc = sid_to_id(user_ns, owner_sid_ptr, SIDOWNER, fattr); + rc = sid_to_id(idmap, owner_sid_ptr, SIDOWNER, fattr); if (rc) { pr_err("%s: Error %d mapping Owner SID to uid\n", __func__, rc); @@ -866,7 +866,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, __func__, rc); return rc; } - rc = sid_to_id(user_ns, group_sid_ptr, SIDUNIX_GROUP, fattr); + rc = sid_to_id(idmap, group_sid_ptr, SIDUNIX_GROUP, fattr); if (rc) { pr_err("%s: Error %d mapping Group SID to gid\n", __func__, rc); @@ -881,7 +881,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, pntsd->type |= cpu_to_le16(DACL_PROTECTED); if (dacloffset) { - parse_dacl(user_ns, dacl_ptr, end_of_acl, + parse_dacl(idmap, dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr); } @@ -889,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, } /* Convert permission bits from mode to equivalent CIFS ACL */ -int build_sec_desc(struct user_namespace *user_ns, +int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr) @@ -950,7 +950,7 @@ int build_sec_desc(struct user_namespace *user_ns, dacl_ptr->num_aces = 0; if (!ppntsd) { - set_mode_dacl(user_ns, dacl_ptr, fattr); + set_mode_dacl(idmap, dacl_ptr, fattr); } else { struct smb_acl *ppdacl_ptr; unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset); @@ -966,7 +966,7 @@ int build_sec_desc(struct user_namespace *user_ns, ppdacl_size < sizeof(struct smb_acl)) goto out; - set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr, + set_ntacl_dacl(idmap, dacl_ptr, ppdacl_ptr, ntacl_size - sizeof(struct smb_acl), nowner_sid_ptr, ngroup_sid_ptr, fattr); @@ -1002,13 +1002,13 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_ntsd *parent_pntsd = NULL; struct smb_sid owner_sid, group_sid; struct dentry *parent = path->dentry->d_parent; - struct user_namespace *user_ns = mnt_user_ns(path->mnt); + struct mnt_idmap *idmap = mnt_idmap(path->mnt); int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); - pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, + pntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap, parent, &parent_pntsd); if (pntsd_size <= 0) return -ENOENT; @@ -1162,7 +1162,7 @@ pass: pntsd_size += sizeof(struct smb_acl) + nt_size; } - ksmbd_vfs_set_sd_xattr(conn, user_ns, + ksmbd_vfs_set_sd_xattr(conn, idmap, path->dentry, pntsd, pntsd_size); kfree(pntsd); } @@ -1190,7 +1190,7 @@ bool smb_inherit_flags(int flags, bool is_dir) int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, __le32 *pdaccess, int uid) { - struct user_namespace *user_ns = mnt_user_ns(path->mnt); + struct mnt_idmap *idmap = mnt_idmap(path->mnt); struct smb_ntsd *pntsd = NULL; struct smb_acl *pdacl; struct posix_acl *posix_acls; @@ -1206,7 +1206,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, unsigned short ace_size; ksmbd_debug(SMB, "check permission using windows acl\n"); - pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, + pntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap, path->dentry, &pntsd); if (pntsd_size <= 0 || !pntsd) goto err_out; @@ -1296,9 +1296,9 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, pa_entry = posix_acls->a_entries; for (i = 0; i < posix_acls->a_count; i++, pa_entry++) { if (pa_entry->e_tag == ACL_USER) - id = posix_acl_uid_translate(user_ns, pa_entry); + id = posix_acl_uid_translate(idmap, pa_entry); else if (pa_entry->e_tag == ACL_GROUP) - id = posix_acl_gid_translate(user_ns, pa_entry); + id = posix_acl_gid_translate(idmap, pa_entry); else continue; @@ -1360,14 +1360,14 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, int rc; struct smb_fattr fattr = {{0}}; struct inode *inode = d_inode(path->dentry); - struct user_namespace *user_ns = mnt_user_ns(path->mnt); + struct mnt_idmap *idmap = mnt_idmap(path->mnt); struct iattr newattrs; fattr.cf_uid = INVALID_UID; fattr.cf_gid = INVALID_GID; fattr.cf_mode = inode->i_mode; - rc = parse_sec_desc(user_ns, pntsd, ntsd_len, &fattr); + rc = parse_sec_desc(idmap, pntsd, ntsd_len, &fattr); if (rc) goto out; @@ -1383,17 +1383,17 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, newattrs.ia_valid |= ATTR_MODE; newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777); - ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry); + ksmbd_vfs_remove_acl_xattrs(idmap, path->dentry); /* Update posix acls */ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) { - rc = set_posix_acl(user_ns, path->dentry, + rc = set_posix_acl(idmap, path->dentry, ACL_TYPE_ACCESS, fattr.cf_acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", rc); if (S_ISDIR(inode->i_mode) && fattr.cf_dacls) { - rc = set_posix_acl(user_ns, path->dentry, + rc = set_posix_acl(idmap, path->dentry, ACL_TYPE_DEFAULT, fattr.cf_dacls); if (rc) ksmbd_debug(SMB, @@ -1403,7 +1403,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, } inode_lock(inode); - rc = notify_change(user_ns, path->dentry, &newattrs, NULL); + rc = notify_change(idmap, path->dentry, &newattrs, NULL); inode_unlock(inode); if (rc) goto out; @@ -1414,8 +1414,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { /* Update WinACL in xattr */ - ksmbd_vfs_remove_sd_xattrs(user_ns, path->dentry); - ksmbd_vfs_set_sd_xattr(conn, user_ns, + ksmbd_vfs_remove_sd_xattrs(idmap, path->dentry); + ksmbd_vfs_set_sd_xattr(conn, idmap, path->dentry, pntsd, ntsd_len); } diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h index 618f2e0236b3..49a8c292bd2e 100644 --- a/fs/ksmbd/smbacl.h +++ b/fs/ksmbd/smbacl.h @@ -190,9 +190,9 @@ struct posix_acl_state { struct posix_ace_state_array *groups; }; -int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, +int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, int acl_len, struct smb_fattr *fattr); -int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, +int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr); int init_acl_state(struct posix_acl_state *state, int cnt); @@ -211,25 +211,25 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid); void ksmbd_init_domain(u32 *sub_auth); -static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns, +static inline uid_t posix_acl_uid_translate(struct mnt_idmap *idmap, struct posix_acl_entry *pace) { vfsuid_t vfsuid; /* If this is an idmapped mount, apply the idmapping. */ - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, pace->e_uid); + vfsuid = make_vfsuid(idmap, &init_user_ns, pace->e_uid); /* Translate the kuid into a userspace id ksmbd would see. */ return from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid)); } -static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns, +static inline gid_t posix_acl_gid_translate(struct mnt_idmap *idmap, struct posix_acl_entry *pace) { vfsgid_t vfsgid; /* If this is an idmapped mount, apply the idmapping. */ - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, pace->e_gid); + vfsgid = make_vfsgid(idmap, &init_user_ns, pace->e_gid); /* Translate the kgid into a userspace id ksmbd would see. */ return from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid)); diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index c9aca21637d5..40c721f9227e 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -308,6 +308,9 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) if (req->smbd_max_io_size) init_smbd_max_io_size(req->smbd_max_io_size); + if (req->max_connections) + server_conf.max_connections = req->max_connections; + ret = ksmbd_set_netbios_name(req->netbios_name); ret |= ksmbd_set_server_string(req->server_string); ret |= ksmbd_set_work_group(req->work_group); diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 63d55f543bd2..603893fd87f5 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -15,6 +15,8 @@ #define IFACE_STATE_DOWN BIT(0) #define IFACE_STATE_CONFIGURED BIT(1) +static atomic_t active_num_conn; + struct interface { struct task_struct *ksmbd_kthread; struct socket *ksmbd_socket; @@ -185,8 +187,10 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk) struct tcp_transport *t; t = alloc_transport(client_sk); - if (!t) + if (!t) { + sock_release(client_sk); return -ENOMEM; + } csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn); if (kernel_getpeername(client_sk, csin) < 0) { @@ -239,6 +243,15 @@ static int ksmbd_kthread_fn(void *p) continue; } + if (server_conf.max_connections && + atomic_inc_return(&active_num_conn) >= server_conf.max_connections) { + pr_info_ratelimited("Limit the maximum number of connections(%u)\n", + atomic_read(&active_num_conn)); + atomic_dec(&active_num_conn); + sock_release(client_sk); + continue; + } + ksmbd_debug(CONN, "connect success: accepted new connection\n"); client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT; client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT; @@ -295,6 +308,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; + int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -321,9 +335,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if (length == -ERESTARTSYS || length == -EAGAIN) { + } else if ((length == -ERESTARTSYS || length == -EAGAIN) && + max_retry) { usleep_range(1000, 2000); length = 0; + max_retry--; continue; } else if (length <= 0) { total_read = -EAGAIN; @@ -365,6 +381,8 @@ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov, static void ksmbd_tcp_disconnect(struct ksmbd_transport *t) { free_transport(TCP_TRANS(t)); + if (server_conf.max_connections) + atomic_dec(&active_num_conn); } static void tcp_destroy_socket(struct socket *ksmbd_socket) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index ff0e7a4fcd4d..aa1300b7bfc2 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/uaccess.h> #include <linux/backing-dev.h> #include <linux/writeback.h> @@ -69,14 +70,14 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work, * * the reference count of @parent isn't incremented. */ -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent, +int ksmbd_vfs_lock_parent(struct mnt_idmap *idmap, struct dentry *parent, struct dentry *child) { struct dentry *dentry; int ret = 0; inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); - dentry = lookup_one(user_ns, child->d_name.name, parent, + dentry = lookup_one(idmap, child->d_name.name, parent, child->d_name.len); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); @@ -96,20 +97,20 @@ out_err: return ret; } -int ksmbd_vfs_may_delete(struct user_namespace *user_ns, +int ksmbd_vfs_may_delete(struct mnt_idmap *idmap, struct dentry *dentry) { struct dentry *parent; int ret; parent = dget_parent(dentry); - ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry); + ret = ksmbd_vfs_lock_parent(idmap, parent, dentry); if (ret) { dput(parent); return ret; } - ret = inode_permission(user_ns, d_inode(parent), + ret = inode_permission(idmap, d_inode(parent), MAY_EXEC | MAY_WRITE); inode_unlock(d_inode(parent)); @@ -117,7 +118,7 @@ int ksmbd_vfs_may_delete(struct user_namespace *user_ns, return ret; } -int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns, +int ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap, struct dentry *dentry, __le32 *daccess) { struct dentry *parent; @@ -125,26 +126,26 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns, *daccess = cpu_to_le32(FILE_READ_ATTRIBUTES | READ_CONTROL); - if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_WRITE)) + if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_WRITE)) *daccess |= cpu_to_le32(WRITE_DAC | WRITE_OWNER | SYNCHRONIZE | FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | FILE_DELETE_CHILD); - if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_READ)) + if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_READ)) *daccess |= FILE_READ_DATA_LE | FILE_READ_EA_LE; - if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC)) + if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_EXEC)) *daccess |= FILE_EXECUTE_LE; parent = dget_parent(dentry); - ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry); + ret = ksmbd_vfs_lock_parent(idmap, parent, dentry); if (ret) { dput(parent); return ret; } - if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE)) + if (!inode_permission(idmap, d_inode(parent), MAY_EXEC | MAY_WRITE)) *daccess |= FILE_DELETE_LE; inode_unlock(d_inode(parent)); @@ -177,7 +178,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } mode |= S_IFREG; - err = vfs_create(mnt_user_ns(path.mnt), d_inode(path.dentry), + err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry), dentry, mode, true); if (!err) { ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), @@ -199,7 +200,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) */ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) { - struct user_namespace *user_ns; + struct mnt_idmap *idmap; struct path path; struct dentry *dentry; int err; @@ -215,15 +216,15 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) return err; } - user_ns = mnt_user_ns(path.mnt); + idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; - err = vfs_mkdir(user_ns, d_inode(path.dentry), dentry, mode); + err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); if (err) { goto out; } else if (d_unhashed(dentry)) { struct dentry *d; - d = lookup_one(user_ns, dentry->d_name.name, dentry->d_parent, + d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, dentry->d_name.len); if (IS_ERR(d)) { err = PTR_ERR(d); @@ -245,7 +246,7 @@ out: return err; } -static ssize_t ksmbd_vfs_getcasexattr(struct user_namespace *user_ns, +static ssize_t ksmbd_vfs_getcasexattr(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name, int attr_name_len, char **attr_value) { @@ -262,7 +263,7 @@ static ssize_t ksmbd_vfs_getcasexattr(struct user_namespace *user_ns, if (strncasecmp(attr_name, name, attr_name_len)) continue; - value_len = ksmbd_vfs_getxattr(user_ns, + value_len = ksmbd_vfs_getxattr(idmap, dentry, name, attr_value); @@ -285,7 +286,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos, ksmbd_debug(VFS, "read stream data pos : %llu, count : %zd\n", *pos, count); - v_len = ksmbd_vfs_getcasexattr(file_mnt_user_ns(fp->filp), + v_len = ksmbd_vfs_getcasexattr(file_mnt_idmap(fp->filp), fp->filp->f_path.dentry, fp->stream.name, fp->stream.size, @@ -409,7 +410,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, size_t count) { char *stream_buf = NULL, *wbuf; - struct user_namespace *user_ns = file_mnt_user_ns(fp->filp); + struct mnt_idmap *idmap = file_mnt_idmap(fp->filp); size_t size, v_len; int err = 0; @@ -422,7 +423,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, count = (*pos + count) - XATTR_SIZE_MAX; } - v_len = ksmbd_vfs_getcasexattr(user_ns, + v_len = ksmbd_vfs_getcasexattr(idmap, fp->filp->f_path.dentry, fp->stream.name, fp->stream.size, @@ -448,7 +449,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, memcpy(&stream_buf[*pos], buf, count); - err = ksmbd_vfs_setxattr(user_ns, + err = ksmbd_vfs_setxattr(idmap, fp->filp->f_path.dentry, fp->stream.name, (void *)stream_buf, @@ -583,7 +584,7 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id) */ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name) { - struct user_namespace *user_ns; + struct mnt_idmap *idmap; struct path path; struct dentry *parent; int err; @@ -598,9 +599,9 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name) return err; } - user_ns = mnt_user_ns(path.mnt); + idmap = mnt_idmap(path.mnt); parent = dget_parent(path.dentry); - err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry); + err = ksmbd_vfs_lock_parent(idmap, parent, path.dentry); if (err) { dput(parent); path_put(&path); @@ -614,12 +615,12 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name) } if (S_ISDIR(d_inode(path.dentry)->i_mode)) { - err = vfs_rmdir(user_ns, d_inode(parent), path.dentry); + err = vfs_rmdir(idmap, d_inode(parent), path.dentry); if (err && err != -ENOTEMPTY) ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name, err); } else { - err = vfs_unlink(user_ns, d_inode(parent), path.dentry, NULL); + err = vfs_unlink(idmap, d_inode(parent), path.dentry, NULL); if (err) ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name, err); @@ -672,7 +673,7 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, goto out3; } - err = vfs_link(oldpath.dentry, mnt_user_ns(newpath.mnt), + err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt), d_inode(newpath.dentry), dentry, NULL); if (err) @@ -711,10 +712,10 @@ static int ksmbd_validate_entry_in_use(struct dentry *src_dent) } static int __ksmbd_vfs_rename(struct ksmbd_work *work, - struct user_namespace *src_user_ns, + struct mnt_idmap *src_idmap, struct dentry *src_dent_parent, struct dentry *src_dent, - struct user_namespace *dst_user_ns, + struct mnt_idmap *dst_idmap, struct dentry *dst_dent_parent, struct dentry *trap_dent, char *dst_name) @@ -740,8 +741,8 @@ static int __ksmbd_vfs_rename(struct ksmbd_work *work, if (ksmbd_override_fsids(work)) return -ENOMEM; - dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent, - strlen(dst_name)); + dst_dent = lookup_one(dst_idmap, dst_name, + dst_dent_parent, strlen(dst_name)); err = PTR_ERR(dst_dent); if (IS_ERR(dst_dent)) { pr_err("lookup failed %s [%d]\n", dst_name, err); @@ -751,10 +752,10 @@ static int __ksmbd_vfs_rename(struct ksmbd_work *work, err = -ENOTEMPTY; if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) { struct renamedata rd = { - .old_mnt_userns = src_user_ns, + .old_mnt_idmap = src_idmap, .old_dir = d_inode(src_dent_parent), .old_dentry = src_dent, - .new_mnt_userns = dst_user_ns, + .new_mnt_idmap = dst_idmap, .new_dir = d_inode(dst_dent_parent), .new_dentry = dst_dent, }; @@ -772,7 +773,7 @@ out: int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, char *newname) { - struct user_namespace *user_ns; + struct mnt_idmap *idmap; struct path dst_path; struct dentry *src_dent_parent, *dst_dent_parent; struct dentry *src_dent, *trap_dent, *src_child; @@ -800,8 +801,8 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, trap_dent = lock_rename(src_dent_parent, dst_dent_parent); dget(src_dent); dget(dst_dent_parent); - user_ns = file_mnt_user_ns(fp->filp); - src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent, + idmap = file_mnt_idmap(fp->filp); + src_child = lookup_one(idmap, src_dent->d_name.name, src_dent_parent, src_dent->d_name.len); if (IS_ERR(src_child)) { err = PTR_ERR(src_child); @@ -816,10 +817,10 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, dput(src_child); err = __ksmbd_vfs_rename(work, - user_ns, + idmap, src_dent_parent, src_dent, - mnt_user_ns(dst_path.mnt), + mnt_idmap(dst_path.mnt), dst_dent_parent, trap_dent, dst_name); @@ -907,22 +908,22 @@ ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list) return size; } -static ssize_t ksmbd_vfs_xattr_len(struct user_namespace *user_ns, +static ssize_t ksmbd_vfs_xattr_len(struct mnt_idmap *idmap, struct dentry *dentry, char *xattr_name) { - return vfs_getxattr(user_ns, dentry, xattr_name, NULL, 0); + return vfs_getxattr(idmap, dentry, xattr_name, NULL, 0); } /** * ksmbd_vfs_getxattr() - vfs helper for smb get extended attributes value - * @user_ns: user namespace + * @idmap: idmap * @dentry: dentry of file for getting xattrs * @xattr_name: name of xattr name to query * @xattr_buf: destination buffer xattr value * * Return: read xattr value length on success, otherwise error */ -ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, +ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry, char *xattr_name, char **xattr_buf) { @@ -930,7 +931,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, char *buf; *xattr_buf = NULL; - xattr_len = ksmbd_vfs_xattr_len(user_ns, dentry, xattr_name); + xattr_len = ksmbd_vfs_xattr_len(idmap, dentry, xattr_name); if (xattr_len < 0) return xattr_len; @@ -938,7 +939,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, if (!buf) return -ENOMEM; - xattr_len = vfs_getxattr(user_ns, dentry, xattr_name, + xattr_len = vfs_getxattr(idmap, dentry, xattr_name, (void *)buf, xattr_len); if (xattr_len > 0) *xattr_buf = buf; @@ -949,7 +950,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, /** * ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value - * @user_ns: user namespace + * @idmap: idmap of the relevant mount * @dentry: dentry to set XATTR at * @name: xattr name for setxattr * @value: xattr value to set @@ -958,13 +959,13 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, * * Return: 0 on success, otherwise error */ -int ksmbd_vfs_setxattr(struct user_namespace *user_ns, +int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *attr_name, void *attr_value, size_t attr_size, int flags) { int err; - err = vfs_setxattr(user_ns, + err = vfs_setxattr(idmap, dentry, attr_name, attr_value, @@ -1074,26 +1075,26 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, return ret; } -int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns, +int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name) { - return vfs_removexattr(user_ns, dentry, attr_name); + return vfs_removexattr(idmap, dentry, attr_name); } -int ksmbd_vfs_unlink(struct user_namespace *user_ns, +int ksmbd_vfs_unlink(struct mnt_idmap *idmap, struct dentry *dir, struct dentry *dentry) { int err = 0; - err = ksmbd_vfs_lock_parent(user_ns, dir, dentry); + err = ksmbd_vfs_lock_parent(idmap, dir, dentry); if (err) return err; dget(dentry); if (S_ISDIR(d_inode(dentry)->i_mode)) - err = vfs_rmdir(user_ns, d_inode(dir), dentry); + err = vfs_rmdir(idmap, d_inode(dir), dentry); else - err = vfs_unlink(user_ns, d_inode(dir), dentry, NULL); + err = vfs_unlink(idmap, d_inode(dir), dentry, NULL); dput(dentry); inode_unlock(d_inode(dir)); @@ -1298,7 +1299,7 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, return dent; } -int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns, +int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, struct dentry *dentry) { char *name, *xattr_list = NULL; @@ -1321,7 +1322,7 @@ int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns, sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) || !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) { - err = vfs_remove_acl(user_ns, dentry, name); + err = vfs_remove_acl(idmap, dentry, name); if (err) ksmbd_debug(SMB, "remove acl xattr failed : %s\n", name); @@ -1332,7 +1333,7 @@ out: return err; } -int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns, +int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, struct dentry *dentry) { char *name, *xattr_list = NULL; @@ -1352,7 +1353,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns, ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { - err = ksmbd_vfs_remove_xattr(user_ns, dentry, name); + err = ksmbd_vfs_remove_xattr(idmap, dentry, name); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); } @@ -1362,7 +1363,7 @@ out: return err; } -static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespace *user_ns, +static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct mnt_idmap *idmap, struct inode *inode, int acl_type) { @@ -1392,14 +1393,14 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespac switch (pa_entry->e_tag) { case ACL_USER: xa_entry->type = SMB_ACL_USER; - xa_entry->uid = posix_acl_uid_translate(user_ns, pa_entry); + xa_entry->uid = posix_acl_uid_translate(idmap, pa_entry); break; case ACL_USER_OBJ: xa_entry->type = SMB_ACL_USER_OBJ; break; case ACL_GROUP: xa_entry->type = SMB_ACL_GROUP; - xa_entry->gid = posix_acl_gid_translate(user_ns, pa_entry); + xa_entry->gid = posix_acl_gid_translate(idmap, pa_entry); break; case ACL_GROUP_OBJ: xa_entry->type = SMB_ACL_GROUP_OBJ; @@ -1428,7 +1429,7 @@ out: } int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd *pntsd, int len) { @@ -1461,13 +1462,13 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, return rc; } - smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode, + smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode, ACL_TYPE_ACCESS); if (S_ISDIR(inode->i_mode)) - def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode, + def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode, ACL_TYPE_DEFAULT); - rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode, + rc = ndr_encode_posix_acl(&acl_ndr, idmap, inode, smb_acl, def_smb_acl); if (rc) { pr_err("failed to encode ndr to posix acl\n"); @@ -1487,7 +1488,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, goto out; } - rc = ksmbd_vfs_setxattr(user_ns, dentry, + rc = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_SD, sd_ndr.data, sd_ndr.offset, 0); if (rc < 0) @@ -1502,7 +1503,7 @@ out: } int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd **pntsd) { @@ -1514,7 +1515,7 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, struct xattr_smb_acl *smb_acl = NULL, *def_smb_acl = NULL; __u8 cmp_hash[XATTR_SD_HASH_SIZE] = {0}; - rc = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_SD, &n.data); + rc = ksmbd_vfs_getxattr(idmap, dentry, XATTR_NAME_SD, &n.data); if (rc <= 0) return rc; @@ -1523,13 +1524,13 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, if (rc) goto free_n_data; - smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode, + smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode, ACL_TYPE_ACCESS); if (S_ISDIR(inode->i_mode)) - def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode, + def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode, ACL_TYPE_DEFAULT); - rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode, smb_acl, + rc = ndr_encode_posix_acl(&acl_ndr, idmap, inode, smb_acl, def_smb_acl); if (rc) { pr_err("failed to encode ndr to posix acl\n"); @@ -1576,7 +1577,7 @@ free_n_data: return rc; } -int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns, +int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da) { @@ -1587,7 +1588,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns, if (err) return err; - err = ksmbd_vfs_setxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE, + err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE, (void *)n.data, n.offset, 0); if (err) ksmbd_debug(SMB, "failed to store dos attribute in xattr\n"); @@ -1596,14 +1597,14 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns, return err; } -int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns, +int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da) { struct ndr n; int err; - err = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE, + err = ksmbd_vfs_getxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE, (char **)&n.data); if (err > 0) { n.length = err; @@ -1650,14 +1651,14 @@ void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat) } int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct dentry *dentry, struct ksmbd_kstat *ksmbd_kstat) { u64 time; int rc; - generic_fillattr(user_ns, d_inode(dentry), ksmbd_kstat->kstat); + generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat); time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime); ksmbd_kstat->create_time = time; @@ -1675,7 +1676,7 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) { struct xattr_dos_attrib da; - rc = ksmbd_vfs_get_dos_attrib_xattr(user_ns, dentry, &da); + rc = ksmbd_vfs_get_dos_attrib_xattr(idmap, dentry, &da); if (rc > 0) { ksmbd_kstat->file_attributes = cpu_to_le32(da.attr); ksmbd_kstat->create_time = da.create_time; @@ -1687,7 +1688,7 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work, return 0; } -ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns, +ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name, int attr_name_len) { @@ -1704,7 +1705,7 @@ ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns, if (strncasecmp(attr_name, name, attr_name_len)) continue; - value_len = ksmbd_vfs_xattr_len(user_ns, dentry, name); + value_len = ksmbd_vfs_xattr_len(idmap, dentry, name); break; } @@ -1823,7 +1824,7 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock) locks_delete_block(flock); } -int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, +int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry) { struct posix_acl_state acl_state; @@ -1857,13 +1858,13 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, return -ENOMEM; } posix_state_to_acl(&acl_state, acls->a_entries); - rc = set_posix_acl(user_ns, dentry, ACL_TYPE_ACCESS, acls); + rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", rc); else if (S_ISDIR(inode->i_mode)) { posix_state_to_acl(&acl_state, acls->a_entries); - rc = set_posix_acl(user_ns, dentry, ACL_TYPE_DEFAULT, acls); + rc = set_posix_acl(idmap, dentry, ACL_TYPE_DEFAULT, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); @@ -1873,7 +1874,7 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, return rc; } -int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns, +int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct inode *parent_inode) { struct posix_acl *acls; @@ -1896,12 +1897,12 @@ int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns, } } - rc = set_posix_acl(user_ns, dentry, ACL_TYPE_ACCESS, acls); + rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", rc); if (S_ISDIR(inode->i_mode)) { - rc = set_posix_acl(user_ns, dentry, ACL_TYPE_DEFAULT, + rc = set_posix_acl(idmap, dentry, ACL_TYPE_DEFAULT, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index 0d73d735cc39..9d676ab0cd25 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -71,10 +71,10 @@ struct ksmbd_kstat { __le32 file_attributes; }; -int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent, +int ksmbd_vfs_lock_parent(struct mnt_idmap *idmap, struct dentry *parent, struct dentry *child); -int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry); -int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns, +int ksmbd_vfs_may_delete(struct mnt_idmap *idmap, struct dentry *dentry); +int ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap, struct dentry *dentry, __le32 *daccess); int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode); int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode); @@ -102,19 +102,19 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, unsigned int *chunk_size_written, loff_t *total_size_written); ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list); -ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, +ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry, char *xattr_name, char **xattr_buf); -ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns, +ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name, int attr_name_len); -int ksmbd_vfs_setxattr(struct user_namespace *user_ns, +int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *attr_name, void *attr_value, size_t attr_size, int flags); int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); -int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns, +int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name); int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name, unsigned int flags, struct path *path, @@ -131,37 +131,37 @@ struct file_allocated_range_buffer; int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, struct file_allocated_range_buffer *ranges, unsigned int in_count, unsigned int *out_count); -int ksmbd_vfs_unlink(struct user_namespace *user_ns, - struct dentry *dir, struct dentry *dentry); +int ksmbd_vfs_unlink(struct mnt_idmap *idmap, struct dentry *dir, + struct dentry *dentry); void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat); int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct dentry *dentry, struct ksmbd_kstat *ksmbd_kstat); void ksmbd_vfs_posix_lock_wait(struct file_lock *flock); int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout); void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock); -int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns, +int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, struct dentry *dentry); -int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns, +int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, struct dentry *dentry); int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd *pntsd, int len); int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, - struct user_namespace *user_ns, + struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd **pntsd); -int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns, +int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da); -int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns, +int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da); -int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, +int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry); -int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns, +int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct inode *parent_inode); #endif /* __KSMBD_VFS_H__ */ diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c index da9163b00350..1d8126443a7f 100644 --- a/fs/ksmbd/vfs_cache.c +++ b/fs/ksmbd/vfs_cache.c @@ -5,6 +5,7 @@ */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -251,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) filp = fp->filp; if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) { ci->m_flags &= ~S_DEL_ON_CLS_STREAM; - err = ksmbd_vfs_remove_xattr(file_mnt_user_ns(filp), + err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), filp->f_path.dentry, fp->stream.name); if (err) @@ -266,7 +267,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) dir = dentry->d_parent; ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING); write_unlock(&ci->m_lock); - ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry); + ksmbd_vfs_unlink(file_mnt_idmap(filp), dir, dentry); write_lock(&ci->m_lock); } write_unlock(&ci->m_lock); diff --git a/fs/libfs.c b/fs/libfs.c index aada4e7c8713..4eda519c3002 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -28,12 +28,12 @@ #include "internal.h" -int simple_getattr(struct user_namespace *mnt_userns, const struct path *path, +int simple_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); return 0; } @@ -473,7 +473,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, } EXPORT_SYMBOL_GPL(simple_rename_exchange); -int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -509,7 +509,7 @@ EXPORT_SYMBOL(simple_rename); /** * simple_setattr - setattr for simple filesystem - * @mnt_userns: user namespace of the target mount + * @idmap: idmap of the target mount * @dentry: dentry * @iattr: iattr structure * @@ -522,19 +522,19 @@ EXPORT_SYMBOL(simple_rename); * on simple regular filesystems. Anything that needs to change on-disk * or wire state on size changes needs its own setattr method. */ -int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int error; - error = setattr_prepare(mnt_userns, dentry, iattr); + error = setattr_prepare(idmap, dentry, iattr); if (error) return error; if (iattr->ia_valid & ATTR_SIZE) truncate_setsize(inode, iattr->ia_size); - setattr_copy(mnt_userns, inode, iattr); + setattr_copy(idmap, inode, iattr); mark_inode_dirty(inode); return 0; } @@ -1315,16 +1315,16 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } -static int empty_dir_getattr(struct user_namespace *mnt_userns, +static int empty_dir_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); return 0; } -static int empty_dir_setattr(struct user_namespace *mnt_userns, +static int empty_dir_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return -EPERM; @@ -1582,3 +1582,39 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force) return true; } EXPORT_SYMBOL(inode_maybe_inc_iversion); + +/** + * inode_query_iversion - read i_version for later use + * @inode: inode from which i_version should be read + * + * Read the inode i_version counter. This should be used by callers that wish + * to store the returned i_version for later comparison. This will guarantee + * that a later query of the i_version will result in a different value if + * anything has changed. + * + * In this implementation, we fetch the current value, set the QUERIED flag and + * then try to swap it into place with a cmpxchg, if it wasn't already set. If + * that fails, we try again with the newly fetched value from the cmpxchg. + */ +u64 inode_query_iversion(struct inode *inode) +{ + u64 cur, new; + + cur = inode_peek_iversion_raw(inode); + do { + /* If flag is already set, then no need to swap */ + if (cur & I_VERSION_QUERIED) { + /* + * This barrier (and the implicit barrier in the + * cmpxchg below) pairs with the barrier in + * inode_maybe_inc_iversion(). + */ + smp_mb(); + break; + } + + new = cur | I_VERSION_QUERIED; + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); + return cur >> I_VERSION_QUERIED_SHIFT; +} +EXPORT_SYMBOL(inode_query_iversion); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index a5bb3f721a9d..82b19a30e0f0 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -188,7 +188,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) continue; if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; - if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0) + if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)), fh) != 0) continue; /* Alright, we found a lock. Set the return status * and wake up the caller diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 99fffc9cb958..16b4de868cd2 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/nfs_fs.h> #include <linux/utsname.h> #include <linux/freezer.h> @@ -130,7 +131,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); - memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh)); + memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 5bec78c8e431..17432c445fe6 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -3,6 +3,7 @@ #define __LOCKD_NETNS_H__ #include <linux/fs.h> +#include <linux/filelock.h> #include <net/netns/generic.h> struct lockd_net { diff --git a/fs/locks.c b/fs/locks.c index 8f01bee17715..624c6ac92ede 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -52,6 +52,7 @@ #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/filelock.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/security.h> @@ -233,7 +234,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, char *list_type) { struct file_lock *fl; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); list_for_each_entry(fl, list, fl_list) if (fl->fl_file == filp) @@ -887,7 +888,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; struct file_lock_context *ctx; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); void *owner; void (*func)(void); @@ -1330,7 +1331,7 @@ retry: int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return posix_lock_inode(locks_inode(filp), fl, conflock); + return posix_lock_inode(file_inode(filp), fl, conflock); } EXPORT_SYMBOL(posix_lock_file); @@ -1629,7 +1630,7 @@ EXPORT_SYMBOL(lease_get_mtime); int fcntl_getlease(struct file *filp) { struct file_lock *fl; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int type = F_UNLCK; LIST_HEAD(dispose); @@ -1667,7 +1668,7 @@ int fcntl_getlease(struct file *filp) static int check_conflicting_open(struct file *filp, const long arg, int flags) { - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) @@ -1703,7 +1704,7 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct file_lock *fl, *my_fl = NULL, *lease; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; @@ -1819,7 +1820,7 @@ static int generic_delete_lease(struct file *filp, void *owner) { int error = -EAGAIN; struct file_lock *fl, *victim = NULL; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; LIST_HEAD(dispose); @@ -1861,7 +1862,7 @@ static int generic_delete_lease(struct file *filp, void *owner) int generic_setlease(struct file *filp, long arg, struct file_lock **flp, void **priv) { - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); int error; if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) @@ -2350,7 +2351,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, struct flock *flock) { struct file_lock *file_lock = locks_alloc_lock(); - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); struct file *f; int error; @@ -2554,7 +2555,7 @@ out: void locks_remove_posix(struct file *filp, fl_owner_t owner) { int error; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); struct file_lock lock; struct file_lock_context *ctx; @@ -2591,7 +2592,7 @@ static void locks_remove_flock(struct file *filp, struct file_lock_context *flctx) { struct file_lock fl; - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); if (list_empty(&flctx->flc_flock)) return; @@ -2636,7 +2637,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = locks_inode_context(locks_inode(filp)); + ctx = locks_inode_context(file_inode(filp)); if (!ctx) return; @@ -2720,7 +2721,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, */ if (fl->fl_file != NULL) - inode = locks_inode(fl->fl_file); + inode = file_inode(fl->fl_file); seq_printf(f, "%lld: ", id); @@ -2861,7 +2862,7 @@ static void __show_fd_locks(struct seq_file *f, void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) { - struct inode *inode = locks_inode(filp); + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int id = 0; diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 9115948c624e..724d8191a310 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -252,7 +252,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error) iput(inode); return NULL; } - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = j; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_blocks = 0; diff --git a/fs/minix/file.c b/fs/minix/file.c index 6a7bd2d9eec0..0dd05d47724a 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -22,13 +22,13 @@ const struct file_operations minix_file_operations = { .splice_read = generic_file_splice_read, }; -static int minix_setattr(struct user_namespace *mnt_userns, +static int minix_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -42,7 +42,7 @@ static int minix_setattr(struct user_namespace *mnt_userns, minix_truncate(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index da8bdd1712a7..e9fbb5303a22 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -654,13 +654,13 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc) return err; } -int minix_getattr(struct user_namespace *mnt_userns, const struct path *path, +int minix_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct super_block *sb = path->dentry->d_sb; struct inode *inode = d_inode(path->dentry); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); if (INODE_VERSION(inode) == MINIX_V1) stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb); else diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 202173368025..e0b76defa85c 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -51,7 +51,7 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb); extern int minix_new_block(struct inode * inode); extern void minix_free_block(struct inode *inode, unsigned long block); extern unsigned long minix_count_free_blocks(struct super_block *sb); -extern int minix_getattr(struct user_namespace *, const struct path *, +extern int minix_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 8afdc408ca4f..39ebe10d6a8b 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -33,7 +33,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un return d_splice_alias(inode, dentry); } -static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int minix_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { int error; @@ -52,7 +52,7 @@ static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir, return error; } -static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int minix_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { int error; @@ -65,13 +65,13 @@ static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, return finish_open_simple(file, error); } -static int minix_create(struct user_namespace *mnt_userns, struct inode *dir, +static int minix_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return minix_mknod(mnt_userns, dir, dentry, mode, 0); + return minix_mknod(&nop_mnt_idmap, dir, dentry, mode, 0); } -static int minix_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int minix_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int err = -ENAMETOOLONG; @@ -111,7 +111,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, return add_nondir(dentry, inode); } -static int minix_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int minix_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode * inode; @@ -184,7 +184,7 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) return err; } -static int minix_rename(struct user_namespace *mnt_userns, +static int minix_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c new file mode 100644 index 000000000000..4905665c47d0 --- /dev/null +++ b/fs/mnt_idmapping.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Christian Brauner <brauner@kernel.org> */ + +#include <linux/cred.h> +#include <linux/fs.h> +#include <linux/mnt_idmapping.h> +#include <linux/slab.h> +#include <linux/user_namespace.h> + +#include "internal.h" + +struct mnt_idmap { + struct user_namespace *owner; + refcount_t count; +}; + +/* + * Carries the initial idmapping of 0:0:4294967295 which is an identity + * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is + * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. + */ +struct mnt_idmap nop_mnt_idmap = { + .owner = &init_user_ns, + .count = REFCOUNT_INIT(1), +}; +EXPORT_SYMBOL_GPL(nop_mnt_idmap); + +/** + * check_fsmapping - check whether an mount idmapping is allowed + * @idmap: idmap of the relevent mount + * @sb: super block of the filesystem + * + * Return: true if @idmap is allowed, false if not. + */ +bool check_fsmapping(const struct mnt_idmap *idmap, + const struct super_block *sb) +{ + return idmap->owner != sb->s_user_ns; +} + +/** + * initial_idmapping - check whether this is the initial mapping + * @ns: idmapping to check + * + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, + * [...], 1000 to 1000 [...]. + * + * Return: true if this is the initial mapping, false if not. + */ +static inline bool initial_idmapping(const struct user_namespace *ns) +{ + return ns == &init_user_ns; +} + +/** + * no_idmapping - check whether we can skip remapping a kuid/gid + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * This function can be used to check whether a remapping between two + * idmappings is required. + * An idmapped mount is a mount that has an idmapping attached to it that + * is different from the filsystem's idmapping and the initial idmapping. + * If the initial mapping is used or the idmapping of the mount and the + * filesystem are identical no remapping is required. + * + * Return: true if remapping can be skipped, false if not. + */ +static inline bool no_idmapping(const struct user_namespace *mnt_userns, + const struct user_namespace *fs_userns) +{ + return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; +} + +/** + * make_vfsuid - map a filesystem kuid according to an idmapping + * @idmap: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Take a @kuid and remap it from @fs_userns into @idmap. Use this + * function when preparing a @kuid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * from_kuid() so we can simply retrieve the value via __kuid_val() + * directly. + * + * Return: @kuid mapped according to @idmap. + * If @kuid has no mapping in either @idmap or @fs_userns INVALID_UID is + * returned. + */ + +vfsuid_t make_vfsuid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + struct user_namespace *mnt_userns = idmap->owner; + + if (no_idmapping(mnt_userns, fs_userns)) + return VFSUIDT_INIT(kuid); + if (initial_idmapping(fs_userns)) + uid = __kuid_val(kuid); + else + uid = from_kuid(fs_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_VFSUID; + return VFSUIDT_INIT(make_kuid(mnt_userns, uid)); +} +EXPORT_SYMBOL_GPL(make_vfsuid); + +/** + * make_vfsgid - map a filesystem kgid according to an idmapping + * @idmap: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Take a @kgid and remap it from @fs_userns into @idmap. Use this + * function when preparing a @kgid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * from_kgid() so we can simply retrieve the value via __kgid_val() + * directly. + * + * Return: @kgid mapped according to @idmap. + * If @kgid has no mapping in either @idmap or @fs_userns INVALID_GID is + * returned. + */ +vfsgid_t make_vfsgid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, kgid_t kgid) +{ + gid_t gid; + struct user_namespace *mnt_userns = idmap->owner; + + if (no_idmapping(mnt_userns, fs_userns)) + return VFSGIDT_INIT(kgid); + if (initial_idmapping(fs_userns)) + gid = __kgid_val(kgid); + else + gid = from_kgid(fs_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_VFSGID; + return VFSGIDT_INIT(make_kgid(mnt_userns, gid)); +} +EXPORT_SYMBOL_GPL(make_vfsgid); + +/** + * from_vfsuid - map a vfsuid into the filesystem idmapping + * @idmap: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsuid : vfsuid to be mapped + * + * Map @vfsuid into the filesystem idmapping. This function has to be used in + * order to e.g. write @vfsuid to inode->i_uid. + * + * Return: @vfsuid mapped into the filesystem idmapping + */ +kuid_t from_vfsuid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, vfsuid_t vfsuid) +{ + uid_t uid; + struct user_namespace *mnt_userns = idmap->owner; + + if (no_idmapping(mnt_userns, fs_userns)) + return AS_KUIDT(vfsuid); + uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid)); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); +} +EXPORT_SYMBOL_GPL(from_vfsuid); + +/** + * from_vfsgid - map a vfsgid into the filesystem idmapping + * @idmap: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsgid : vfsgid to be mapped + * + * Map @vfsgid into the filesystem idmapping. This function has to be used in + * order to e.g. write @vfsgid to inode->i_gid. + * + * Return: @vfsgid mapped into the filesystem idmapping + */ +kgid_t from_vfsgid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, vfsgid_t vfsgid) +{ + gid_t gid; + struct user_namespace *mnt_userns = idmap->owner; + + if (no_idmapping(mnt_userns, fs_userns)) + return AS_KGIDT(vfsgid); + gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid)); + if (gid == (gid_t)-1) + return INVALID_GID; + if (initial_idmapping(fs_userns)) + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); +} +EXPORT_SYMBOL_GPL(from_vfsgid); + +#ifdef CONFIG_MULTIUSER +/** + * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups + * @vfsgid: the mnt gid to match + * + * This function can be used to determine whether @vfsuid matches any of the + * caller's groups. + * + * Return: 1 if vfsuid matches caller's groups, 0 if not. + */ +int vfsgid_in_group_p(vfsgid_t vfsgid) +{ + return in_group_p(AS_KGIDT(vfsgid)); +} +#else +int vfsgid_in_group_p(vfsgid_t vfsgid) +{ + return 1; +} +#endif +EXPORT_SYMBOL_GPL(vfsgid_in_group_p); + +struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns) +{ + struct mnt_idmap *idmap; + + idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT); + if (!idmap) + return ERR_PTR(-ENOMEM); + + idmap->owner = get_user_ns(mnt_userns); + refcount_set(&idmap->count, 1); + return idmap; +} + +/** + * mnt_idmap_get - get a reference to an idmapping + * @idmap: the idmap to bump the reference on + * + * If @idmap is not the @nop_mnt_idmap bump the reference count. + * + * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed. + */ +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) +{ + if (idmap != &nop_mnt_idmap) + refcount_inc(&idmap->count); + + return idmap; +} + +/** + * mnt_idmap_put - put a reference to an idmapping + * @idmap: the idmap to put the reference on + * + * If this is a non-initial idmapping, put the reference count when a mount is + * released and free it if we're the last user. + */ +void mnt_idmap_put(struct mnt_idmap *idmap) +{ + if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) { + put_user_ns(idmap->owner); + kfree(idmap); + } +} diff --git a/fs/namei.c b/fs/namei.c index 309ae6fc8c99..5855dc6edbd5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/namei.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> @@ -273,7 +274,7 @@ void putname(struct filename *name) /** * check_acl - perform ACL permission checking - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * @@ -281,13 +282,13 @@ void putname(struct filename *name) * retrieve POSIX acls it needs to know whether it is called from a blocking or * non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -static int check_acl(struct user_namespace *mnt_userns, +static int check_acl(struct mnt_idmap *idmap, struct inode *inode, int mask) { #ifdef CONFIG_FS_POSIX_ACL @@ -300,14 +301,14 @@ static int check_acl(struct user_namespace *mnt_userns, /* no ->get_inode_acl() calls in RCU mode... */ if (is_uncached_acl(acl)) return -ECHILD; - return posix_acl_permission(mnt_userns, inode, acl, mask); + return posix_acl_permission(idmap, inode, acl, mask); } acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { - int error = posix_acl_permission(mnt_userns, inode, acl, mask); + int error = posix_acl_permission(idmap, inode, acl, mask); posix_acl_release(acl); return error; } @@ -318,7 +319,7 @@ static int check_acl(struct user_namespace *mnt_userns, /** * acl_permission_check - perform basic UNIX permission checking - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * @@ -326,20 +327,20 @@ static int check_acl(struct user_namespace *mnt_userns, * function may retrieve POSIX acls it needs to know whether it is called from a * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -static int acl_permission_check(struct user_namespace *mnt_userns, +static int acl_permission_check(struct mnt_idmap *idmap, struct inode *inode, int mask) { unsigned int mode = inode->i_mode; vfsuid_t vfsuid; /* Are we the owner? If so, ACL's don't matter */ - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) { mask &= 7; mode >>= 6; @@ -348,7 +349,7 @@ static int acl_permission_check(struct user_namespace *mnt_userns, /* Do we have ACL's? */ if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { - int error = check_acl(mnt_userns, inode, mask); + int error = check_acl(idmap, inode, mask); if (error != -EAGAIN) return error; } @@ -362,7 +363,7 @@ static int acl_permission_check(struct user_namespace *mnt_userns, * about? Need to check group ownership if so. */ if (mask & (mode ^ (mode >> 3))) { - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); if (vfsgid_in_group_p(vfsgid)) mode >>= 3; } @@ -373,7 +374,7 @@ static int acl_permission_check(struct user_namespace *mnt_userns, /** * generic_permission - check for access rights on a Posix-like filesystem - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: inode to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, * %MAY_NOT_BLOCK ...) @@ -387,13 +388,13 @@ static int acl_permission_check(struct user_namespace *mnt_userns, * request cannot be satisfied (eg. requires blocking or too much complexity). * It would then be called again in ref-walk mode. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int generic_permission(struct user_namespace *mnt_userns, struct inode *inode, +int generic_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int ret; @@ -401,17 +402,17 @@ int generic_permission(struct user_namespace *mnt_userns, struct inode *inode, /* * Do the basic permission checks. */ - ret = acl_permission_check(mnt_userns, inode, mask); + ret = acl_permission_check(idmap, inode, mask); if (ret != -EACCES) return ret; if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ if (!(mask & MAY_WRITE)) - if (capable_wrt_inode_uidgid(mnt_userns, inode, + if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_READ_SEARCH)) return 0; - if (capable_wrt_inode_uidgid(mnt_userns, inode, + if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_OVERRIDE)) return 0; return -EACCES; @@ -422,7 +423,7 @@ int generic_permission(struct user_namespace *mnt_userns, struct inode *inode, */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) - if (capable_wrt_inode_uidgid(mnt_userns, inode, + if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_READ_SEARCH)) return 0; /* @@ -431,7 +432,7 @@ int generic_permission(struct user_namespace *mnt_userns, struct inode *inode, * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) - if (capable_wrt_inode_uidgid(mnt_userns, inode, + if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_OVERRIDE)) return 0; @@ -441,7 +442,7 @@ EXPORT_SYMBOL(generic_permission); /** * do_inode_permission - UNIX permission checking - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * @@ -450,19 +451,19 @@ EXPORT_SYMBOL(generic_permission); * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case". */ -static inline int do_inode_permission(struct user_namespace *mnt_userns, +static inline int do_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { if (likely(inode->i_op->permission)) - return inode->i_op->permission(mnt_userns, inode, mask); + return inode->i_op->permission(idmap, inode, mask); /* This gets set once for the inode lifetime */ spin_lock(&inode->i_lock); inode->i_opflags |= IOP_FASTPERM; spin_unlock(&inode->i_lock); } - return generic_permission(mnt_userns, inode, mask); + return generic_permission(idmap, inode, mask); } /** @@ -487,7 +488,7 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) /** * inode_permission - Check for access rights to a given inode - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * @@ -497,7 +498,7 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ -int inode_permission(struct user_namespace *mnt_userns, +int inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int retval; @@ -518,11 +519,11 @@ int inode_permission(struct user_namespace *mnt_userns, * written back improperly if their true value is unknown * to the vfs. */ - if (HAS_UNMAPPED_ID(mnt_userns, inode)) + if (HAS_UNMAPPED_ID(idmap, inode)) return -EACCES; } - retval = do_inode_permission(mnt_userns, inode, mask); + retval = do_inode_permission(idmap, inode, mask); if (retval) return retval; @@ -1094,14 +1095,14 @@ fs_initcall(init_fs_namei_sysctls); */ static inline int may_follow_link(struct nameidata *nd, const struct inode *inode) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; vfsuid_t vfsuid; if (!sysctl_protected_symlinks) return 0; - mnt_userns = mnt_user_ns(nd->path.mnt); - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + idmap = mnt_idmap(nd->path.mnt); + vfsuid = i_uid_into_vfsuid(idmap, inode); /* Allowed if owner and follower match. */ if (vfsuid_eq_kuid(vfsuid, current_fsuid())) return 0; @@ -1124,7 +1125,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod /** * safe_hardlink_source - Check for safe hardlink conditions - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: the source inode to hardlink from * * Return false if at least one of the following conditions: @@ -1135,7 +1136,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod * * Otherwise returns true. */ -static bool safe_hardlink_source(struct user_namespace *mnt_userns, +static bool safe_hardlink_source(struct mnt_idmap *idmap, struct inode *inode) { umode_t mode = inode->i_mode; @@ -1153,7 +1154,7 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns, return false; /* Hardlinking to unreadable or unwritable sources is dangerous. */ - if (inode_permission(mnt_userns, inode, MAY_READ | MAY_WRITE)) + if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE)) return false; return true; @@ -1161,8 +1162,8 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns, /** * may_linkat - Check permissions for creating a hardlink - * @mnt_userns: user namespace of the mount the inode was found from - * @link: the source to hardlink from + * @idmap: idmap of the mount the inode was found from + * @link: the source to hardlink from * * Block hardlink when all of: * - sysctl_protected_hardlinks enabled @@ -1170,21 +1171,21 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns, * - hardlink source is unsafe (see safe_hardlink_source() above) * - not CAP_FOWNER in a namespace with the inode owner uid mapped * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if successful, -ve on error. */ -int may_linkat(struct user_namespace *mnt_userns, const struct path *link) +int may_linkat(struct mnt_idmap *idmap, const struct path *link) { struct inode *inode = link->dentry->d_inode; /* Inode writeback is not safe when the uid or gid are invalid. */ - if (!vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) || - !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode))) + if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || + !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; if (!sysctl_protected_hardlinks) @@ -1193,8 +1194,8 @@ int may_linkat(struct user_namespace *mnt_userns, const struct path *link) /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ - if (safe_hardlink_source(mnt_userns, inode) || - inode_owner_or_capable(mnt_userns, inode)) + if (safe_hardlink_source(idmap, inode) || + inode_owner_or_capable(idmap, inode)) return 0; audit_log_path_denied(AUDIT_ANOM_LINK, "linkat"); @@ -1205,7 +1206,7 @@ int may_linkat(struct user_namespace *mnt_userns, const struct path *link) * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @nd: nameidata pathwalk data * @inode: the inode of the file to open * @@ -1220,15 +1221,15 @@ int may_linkat(struct user_namespace *mnt_userns, const struct path *link) * the directory doesn't have to be world writable: being group writable will * be enough. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct user_namespace *mnt_userns, +static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, struct inode *const inode) { umode_t dir_mode = nd->dir_mode; @@ -1237,8 +1238,8 @@ static int may_create_in_sticky(struct user_namespace *mnt_userns, if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || likely(!(dir_mode & S_ISVTX)) || - vfsuid_eq(i_uid_into_vfsuid(mnt_userns, inode), dir_vfsuid) || - vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), current_fsuid())) + vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) || + vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) return 0; if (likely(dir_mode & 0002) || @@ -1704,15 +1705,15 @@ static struct dentry *lookup_slow(const struct qstr *name, return res; } -static inline int may_lookup(struct user_namespace *mnt_userns, +static inline int may_lookup(struct mnt_idmap *idmap, struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(mnt_userns, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); + int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD || !try_to_unlazy(nd)) return err; } - return inode_permission(mnt_userns, nd->inode, MAY_EXEC); + return inode_permission(idmap, nd->inode, MAY_EXEC); } static int reserve_stack(struct nameidata *nd, struct path *link) @@ -2253,13 +2254,13 @@ static int link_path_walk(const char *name, struct nameidata *nd) /* At this point we know we have a real path component. */ for(;;) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; const char *link; u64 hash_len; int type; - mnt_userns = mnt_user_ns(nd->path.mnt); - err = may_lookup(mnt_userns, nd); + idmap = mnt_idmap(nd->path.mnt); + err = may_lookup(idmap, nd); if (err) return err; @@ -2307,7 +2308,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) OK: /* pathname or trailing symlink, done */ if (!depth) { - nd->dir_vfsuid = i_uid_into_vfsuid(mnt_userns, nd->inode); + nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode); nd->dir_mode = nd->inode->i_mode; nd->flags &= ~LOOKUP_PARENT; return 0; @@ -2622,7 +2623,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -static int lookup_one_common(struct user_namespace *mnt_userns, +static int lookup_one_common(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len, struct qstr *this) { @@ -2652,7 +2653,7 @@ static int lookup_one_common(struct user_namespace *mnt_userns, return err; } - return inode_permission(mnt_userns, base->d_inode, MAY_EXEC); + return inode_permission(idmap, base->d_inode, MAY_EXEC); } /** @@ -2676,7 +2677,7 @@ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_common(&init_user_ns, name, base, len, &this); + err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); if (err) return ERR_PTR(err); @@ -2703,7 +2704,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_common(&init_user_ns, name, base, len, &this); + err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); if (err) return ERR_PTR(err); @@ -2714,7 +2715,7 @@ EXPORT_SYMBOL(lookup_one_len); /** * lookup_one - filesystem helper to lookup single pathname component - * @mnt_userns: user namespace of the mount the lookup is performed from + * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to @@ -2724,7 +2725,7 @@ EXPORT_SYMBOL(lookup_one_len); * * The caller must hold base->i_mutex. */ -struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name, +struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct dentry *dentry; @@ -2733,7 +2734,7 @@ struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name, WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_common(mnt_userns, name, base, len, &this); + err = lookup_one_common(idmap, name, base, len, &this); if (err) return ERR_PTR(err); @@ -2744,7 +2745,7 @@ EXPORT_SYMBOL(lookup_one); /** * lookup_one_unlocked - filesystem helper to lookup single pathname component - * @mnt_userns: idmapping of the mount the lookup is performed from + * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to @@ -2755,7 +2756,7 @@ EXPORT_SYMBOL(lookup_one); * Unlike lookup_one_len, it should be called without the parent * i_mutex held, and will take the i_mutex itself if necessary. */ -struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, +struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { @@ -2763,7 +2764,7 @@ struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, int err; struct dentry *ret; - err = lookup_one_common(mnt_userns, name, base, len, &this); + err = lookup_one_common(idmap, name, base, len, &this); if (err) return ERR_PTR(err); @@ -2777,7 +2778,7 @@ EXPORT_SYMBOL(lookup_one_unlocked); /** * lookup_one_positive_unlocked - filesystem helper to lookup single * pathname component - * @mnt_userns: idmapping of the mount the lookup is performed from + * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to @@ -2794,11 +2795,11 @@ EXPORT_SYMBOL(lookup_one_unlocked); * * The helper should be called without i_mutex held. */ -struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, +struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { - struct dentry *ret = lookup_one_unlocked(mnt_userns, name, base, len); + struct dentry *ret = lookup_one_unlocked(idmap, name, base, len); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { dput(ret); @@ -2823,7 +2824,7 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked); struct dentry *lookup_one_len_unlocked(const char *name, struct dentry *base, int len) { - return lookup_one_unlocked(&init_user_ns, name, base, len); + return lookup_one_unlocked(&nop_mnt_idmap, name, base, len); } EXPORT_SYMBOL(lookup_one_len_unlocked); @@ -2838,7 +2839,7 @@ EXPORT_SYMBOL(lookup_one_len_unlocked); struct dentry *lookup_positive_unlocked(const char *name, struct dentry *base, int len) { - return lookup_one_positive_unlocked(&init_user_ns, name, base, len); + return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len); } EXPORT_SYMBOL(lookup_positive_unlocked); @@ -2880,16 +2881,16 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, } EXPORT_SYMBOL(user_path_at_empty); -int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir, +int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { kuid_t fsuid = current_fsuid(); - if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), fsuid)) + if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), fsuid)) return 0; - if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, dir), fsuid)) + if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, dir), fsuid)) return 0; - return !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FOWNER); + return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER); } EXPORT_SYMBOL(__check_sticky); @@ -2913,7 +2914,7 @@ EXPORT_SYMBOL(__check_sticky); * 11. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct user_namespace *mnt_userns, struct inode *dir, +static int may_delete(struct mnt_idmap *idmap, struct inode *dir, struct dentry *victim, bool isdir) { struct inode *inode = d_backing_inode(victim); @@ -2926,21 +2927,21 @@ static int may_delete(struct user_namespace *mnt_userns, struct inode *dir, BUG_ON(victim->d_parent->d_inode != dir); /* Inode writeback is not safe when the uid or gid are invalid. */ - if (!vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) || - !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode))) + if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || + !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); - error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); + error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) return -EPERM; - if (check_sticky(mnt_userns, dir, inode) || IS_APPEND(inode) || + if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || - HAS_UNMAPPED_ID(mnt_userns, inode)) + HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; if (isdir) { if (!d_is_dir(victim)) @@ -2965,7 +2966,7 @@ static int may_delete(struct user_namespace *mnt_userns, struct inode *dir, * 4. We should have write and exec permissions on dir * 5. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct user_namespace *mnt_userns, +static inline int may_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *child) { audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); @@ -2973,10 +2974,10 @@ static inline int may_create(struct user_namespace *mnt_userns, return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns)) + if (!fsuidgid_has_mapping(dir->i_sb, idmap)) return -EOVERFLOW; - return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); + return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); } /* @@ -3044,7 +3045,7 @@ static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode) /** * vfs_prepare_mode - prepare the mode to be used for a new inode - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: parent directory of the new inode * @mode: mode of the new inode * @mask_perms: allowed permission by the vfs @@ -3065,11 +3066,11 @@ static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode) * * Returns: mode to be passed to the filesystem */ -static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns, +static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode, umode_t mask_perms, umode_t type) { - mode = mode_strip_sgid(mnt_userns, dir, mode); + mode = mode_strip_sgid(idmap, dir, mode); mode = mode_strip_umask(dir, mode); /* @@ -3084,7 +3085,7 @@ static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns, /** * vfs_create - create new file - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: inode of @dentry * @dentry: pointer to dentry of the base directory * @mode: mode of the new file @@ -3092,27 +3093,29 @@ static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns, * * Create a new file. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_create(struct user_namespace *mnt_userns, struct inode *dir, +int vfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool want_excl) { - int error = may_create(mnt_userns, dir, dentry); + int error; + + error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->create) return -EACCES; /* shouldn't it be ENOSYS? */ - mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IALLUGO, S_IFREG); + mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG); error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(mnt_userns, dir, dentry, mode, want_excl); + error = dir->i_op->create(idmap, dir, dentry, mode, want_excl); if (!error) fsnotify_create(dir, dentry); return error; @@ -3124,7 +3127,7 @@ int vfs_mkobj(struct dentry *dentry, umode_t mode, void *arg) { struct inode *dir = dentry->d_parent->d_inode; - int error = may_create(&init_user_ns, dir, dentry); + int error = may_create(&nop_mnt_idmap, dir, dentry); if (error) return error; @@ -3146,7 +3149,7 @@ bool may_open_dev(const struct path *path) !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } -static int may_open(struct user_namespace *mnt_userns, const struct path *path, +static int may_open(struct mnt_idmap *idmap, const struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; @@ -3182,7 +3185,7 @@ static int may_open(struct user_namespace *mnt_userns, const struct path *path, break; } - error = inode_permission(mnt_userns, inode, MAY_OPEN | acc_mode); + error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); if (error) return error; @@ -3197,13 +3200,13 @@ static int may_open(struct user_namespace *mnt_userns, const struct path *path, } /* O_NOATIME can only be set by the owner or superuser */ - if (flag & O_NOATIME && !inode_owner_or_capable(mnt_userns, inode)) + if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode)) return -EPERM; return 0; } -static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp) +static int handle_truncate(struct mnt_idmap *idmap, struct file *filp) { const struct path *path = &filp->f_path; struct inode *inode = path->dentry->d_inode; @@ -3213,7 +3216,7 @@ static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp) error = security_file_truncate(filp); if (!error) { - error = do_truncate(mnt_userns, path->dentry, 0, + error = do_truncate(idmap, path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, filp); } @@ -3228,7 +3231,7 @@ static inline int open_to_namei_flags(int flag) return flag; } -static int may_o_create(struct user_namespace *mnt_userns, +static int may_o_create(struct mnt_idmap *idmap, const struct path *dir, struct dentry *dentry, umode_t mode) { @@ -3236,10 +3239,10 @@ static int may_o_create(struct user_namespace *mnt_userns, if (error) return error; - if (!fsuidgid_has_mapping(dir->dentry->d_sb, mnt_userns)) + if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap)) return -EOVERFLOW; - error = inode_permission(mnt_userns, dir->dentry->d_inode, + error = inode_permission(idmap, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -3319,7 +3322,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, const struct open_flags *op, bool got_write) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; int open_flag = op->open_flag; @@ -3367,13 +3370,13 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, */ if (unlikely(!got_write)) open_flag &= ~O_TRUNC; - mnt_userns = mnt_user_ns(nd->path.mnt); + idmap = mnt_idmap(nd->path.mnt); if (open_flag & O_CREAT) { if (open_flag & O_EXCL) open_flag &= ~O_TRUNC; - mode = vfs_prepare_mode(mnt_userns, dir->d_inode, mode, mode, mode); + mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode); if (likely(got_write)) - create_error = may_o_create(mnt_userns, &nd->path, + create_error = may_o_create(idmap, &nd->path, dentry, mode); else create_error = -EROFS; @@ -3410,7 +3413,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, goto out_dput; } - error = dir_inode->i_op->create(mnt_userns, dir_inode, dentry, + error = dir_inode->i_op->create(idmap, dir_inode, dentry, mode, open_flag & O_EXCL); if (error) goto out_dput; @@ -3513,7 +3516,7 @@ finish_lookup: static int do_open(struct nameidata *nd, struct file *file, const struct open_flags *op) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; int open_flag = op->open_flag; bool do_truncate; int acc_mode; @@ -3526,13 +3529,13 @@ static int do_open(struct nameidata *nd, } if (!(file->f_mode & FMODE_CREATED)) audit_inode(nd->name, nd->path.dentry, 0); - mnt_userns = mnt_user_ns(nd->path.mnt); + idmap = mnt_idmap(nd->path.mnt); if (open_flag & O_CREAT) { if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED)) return -EEXIST; if (d_is_dir(nd->path.dentry)) return -EISDIR; - error = may_create_in_sticky(mnt_userns, nd, + error = may_create_in_sticky(idmap, nd, d_backing_inode(nd->path.dentry)); if (unlikely(error)) return error; @@ -3552,13 +3555,13 @@ static int do_open(struct nameidata *nd, return error; do_truncate = true; } - error = may_open(mnt_userns, &nd->path, acc_mode, open_flag); + error = may_open(idmap, &nd->path, acc_mode, open_flag); if (!error && !(file->f_mode & FMODE_OPENED)) error = vfs_open(&nd->path, file); if (!error) error = ima_file_check(file, op->acc_mode); if (!error && do_truncate) - error = handle_truncate(mnt_userns, file); + error = handle_truncate(idmap, file); if (unlikely(error > 0)) { WARN_ON(1); error = -EINVAL; @@ -3570,20 +3573,20 @@ static int do_open(struct nameidata *nd, /** * vfs_tmpfile - create tmpfile - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: pointer to dentry of the base directory * @mode: mode of the new tmpfile * @open_flag: flags * * Create a temporary file. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -static int vfs_tmpfile(struct user_namespace *mnt_userns, +static int vfs_tmpfile(struct mnt_idmap *idmap, const struct path *parentpath, struct file *file, umode_t mode) { @@ -3594,7 +3597,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, int open_flag = file->f_flags; /* we want directory to be writable */ - error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); + error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (!dir->i_op->tmpfile) @@ -3604,13 +3607,13 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, return -ENOMEM; file->f_path.mnt = parentpath->mnt; file->f_path.dentry = child; - mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode); - error = dir->i_op->tmpfile(mnt_userns, dir, file, mode); + mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); + error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); if (error) return error; /* Don't check for other permissions, the inode was just created */ - error = may_open(mnt_userns, &file->f_path, 0, file->f_flags); + error = may_open(idmap, &file->f_path, 0, file->f_flags); if (error) return error; inode = file_inode(file); @@ -3619,13 +3622,13 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } - ima_post_create_tmpfile(mnt_userns, inode); + ima_post_create_tmpfile(idmap, inode); return 0; } /** * vfs_tmpfile_open - open a tmpfile for kernel internal use - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @parentpath: path of the base directory * @mode: mode of the new tmpfile * @open_flag: flags @@ -3635,7 +3638,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, * hence this is only for kernel internal use, and must not be installed into * file tables or such. */ -struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns, +struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred) { @@ -3644,7 +3647,7 @@ struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns, file = alloc_empty_file_noaccount(open_flag, cred); if (!IS_ERR(file)) { - error = vfs_tmpfile(mnt_userns, parentpath, file, mode); + error = vfs_tmpfile(idmap, parentpath, file, mode); if (error) { fput(file); file = ERR_PTR(error); @@ -3658,7 +3661,6 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file) { - struct user_namespace *mnt_userns; struct path path; int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); @@ -3667,8 +3669,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; - mnt_userns = mnt_user_ns(path.mnt); - error = vfs_tmpfile(mnt_userns, &path, file, op->mode); + error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode); if (error) goto out2; audit_inode(nd->name, file->f_path.dentry, 0); @@ -3873,7 +3874,7 @@ EXPORT_SYMBOL(user_path_create); /** * vfs_mknod - create device node or file - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: inode of @dentry * @dentry: pointer to dentry of the base directory * @mode: mode of the new device node or file @@ -3881,17 +3882,17 @@ EXPORT_SYMBOL(user_path_create); * * Create a device node or file. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; - int error = may_create(mnt_userns, dir, dentry); + int error = may_create(idmap, dir, dentry); if (error) return error; @@ -3903,7 +3904,7 @@ int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (!dir->i_op->mknod) return -EPERM; - mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode); + mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = devcgroup_inode_mknod(mode, dev); if (error) return error; @@ -3912,7 +3913,7 @@ int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (error) return error; - error = dir->i_op->mknod(mnt_userns, dir, dentry, mode, dev); + error = dir->i_op->mknod(idmap, dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); return error; @@ -3939,7 +3940,7 @@ static int may_mknod(umode_t mode) static int do_mknodat(int dfd, struct filename *name, umode_t mode, unsigned int dev) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct dentry *dentry; struct path path; int error; @@ -3959,20 +3960,20 @@ retry: if (error) goto out2; - mnt_userns = mnt_user_ns(path.mnt); + idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(mnt_userns, path.dentry->d_inode, + error = vfs_create(idmap, path.dentry->d_inode, dentry, mode, true); if (!error) - ima_post_path_mknod(mnt_userns, dentry); + ima_post_path_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(mnt_userns, path.dentry->d_inode, + error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(mnt_userns, path.dentry->d_inode, + error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, 0); break; } @@ -4000,32 +4001,33 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d /** * vfs_mkdir - create directory - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: inode of @dentry * @dentry: pointer to dentry of the base directory * @mode: mode of the new directory * * Create a directory. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - int error = may_create(mnt_userns, dir, dentry); + int error; unsigned max_links = dir->i_sb->s_max_links; + error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->mkdir) return -EPERM; - mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IRWXUGO | S_ISVTX, 0); + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); error = security_inode_mkdir(dir, dentry, mode); if (error) return error; @@ -4033,7 +4035,7 @@ int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (max_links && dir->i_nlink >= max_links) return -EMLINK; - error = dir->i_op->mkdir(mnt_userns, dir, dentry, mode); + error = dir->i_op->mkdir(idmap, dir, dentry, mode); if (!error) fsnotify_mkdir(dir, dentry); return error; @@ -4056,10 +4058,8 @@ retry: error = security_path_mkdir(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { - struct user_namespace *mnt_userns; - mnt_userns = mnt_user_ns(path.mnt); - error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry, - mode); + error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, mode); } done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { @@ -4083,22 +4083,22 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: inode of @dentry * @dentry: pointer to dentry of the base directory * * Remove a directory. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir, +int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { - int error = may_delete(mnt_userns, dir, dentry, 1); + int error = may_delete(idmap, dir, dentry, 1); if (error) return error; @@ -4138,7 +4138,6 @@ EXPORT_SYMBOL(vfs_rmdir); int do_rmdir(int dfd, struct filename *name) { - struct user_namespace *mnt_userns; int error; struct dentry *dentry; struct path path; @@ -4178,8 +4177,7 @@ retry: error = security_path_rmdir(&path, dentry); if (error) goto exit4; - mnt_userns = mnt_user_ns(path.mnt); - error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry); + error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); exit4: dput(dentry); exit3: @@ -4203,7 +4201,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) /** * vfs_unlink - unlink a filesystem object - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: parent directory * @dentry: victim * @delegated_inode: returns victim inode, if the inode is delegated. @@ -4220,17 +4218,17 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, +int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; - int error = may_delete(mnt_userns, dir, dentry, 0); + int error = may_delete(idmap, dir, dentry, 0); if (error) return error; @@ -4304,7 +4302,6 @@ retry_deleg: dentry = __lookup_hash(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - struct user_namespace *mnt_userns; /* Why not before? Because we want correct error value */ if (last.name[last.len]) @@ -4316,9 +4313,8 @@ retry_deleg: error = security_path_unlink(&path, dentry); if (error) goto exit3; - mnt_userns = mnt_user_ns(path.mnt); - error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry, - &delegated_inode); + error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, &delegated_inode); exit3: dput(dentry); } @@ -4370,24 +4366,25 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) /** * vfs_symlink - create symlink - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: inode of @dentry * @dentry: pointer to dentry of the base directory * @oldname: name of the file to link to * * Create a symlink. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(mnt_userns, dir, dentry); + int error; + error = may_create(idmap, dir, dentry); if (error) return error; @@ -4398,7 +4395,7 @@ int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (error) return error; - error = dir->i_op->symlink(mnt_userns, dir, dentry, oldname); + error = dir->i_op->symlink(idmap, dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); return error; @@ -4423,13 +4420,9 @@ retry: goto out_putnames; error = security_path_symlink(&path, dentry, from->name); - if (!error) { - struct user_namespace *mnt_userns; - - mnt_userns = mnt_user_ns(path.mnt); - error = vfs_symlink(mnt_userns, path.dentry->d_inode, dentry, - from->name); - } + if (!error) + error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, from->name); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -4455,7 +4448,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn /** * vfs_link - create a new link * @old_dentry: object to be linked - * @mnt_userns: the user namespace of the mount + * @idmap: idmap of the mount * @dir: new parent * @new_dentry: where to create the new link * @delegated_inode: returns inode needing a delegation break @@ -4472,13 +4465,13 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. + * raw inode simply passs @nop_mnt_idmap. */ -int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns, +int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { @@ -4489,7 +4482,7 @@ int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns, if (!inode) return -ENOENT; - error = may_create(mnt_userns, dir, new_dentry); + error = may_create(idmap, dir, new_dentry); if (error) return error; @@ -4506,7 +4499,7 @@ int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns, * be writen back improperly if their true value is unknown to * the vfs. */ - if (HAS_UNMAPPED_ID(mnt_userns, inode)) + if (HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; if (!dir->i_op->link) return -EPERM; @@ -4553,7 +4546,7 @@ EXPORT_SYMBOL(vfs_link); int do_linkat(int olddfd, struct filename *old, int newdfd, struct filename *new, int flags) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct dentry *new_dentry; struct path old_path, new_path; struct inode *delegated_inode = NULL; @@ -4590,14 +4583,14 @@ retry: error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; - mnt_userns = mnt_user_ns(new_path.mnt); - error = may_linkat(mnt_userns, &old_path); + idmap = mnt_idmap(new_path.mnt); + error = may_linkat(idmap, &old_path); if (unlikely(error)) goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode, + error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); @@ -4697,20 +4690,20 @@ int vfs_rename(struct renamedata *rd) if (source == target) return 0; - error = may_delete(rd->old_mnt_userns, old_dir, old_dentry, is_dir); + error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir); if (error) return error; if (!target) { - error = may_create(rd->new_mnt_userns, new_dir, new_dentry); + error = may_create(rd->new_mnt_idmap, new_dir, new_dentry); } else { new_is_dir = d_is_dir(new_dentry); if (!(flags & RENAME_EXCHANGE)) - error = may_delete(rd->new_mnt_userns, new_dir, + error = may_delete(rd->new_mnt_idmap, new_dir, new_dentry, is_dir); else - error = may_delete(rd->new_mnt_userns, new_dir, + error = may_delete(rd->new_mnt_idmap, new_dir, new_dentry, new_is_dir); } if (error) @@ -4725,13 +4718,13 @@ int vfs_rename(struct renamedata *rd) */ if (new_dir != old_dir) { if (is_dir) { - error = inode_permission(rd->old_mnt_userns, source, + error = inode_permission(rd->old_mnt_idmap, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { - error = inode_permission(rd->new_mnt_userns, target, + error = inode_permission(rd->new_mnt_idmap, target, MAY_WRITE); if (error) return error; @@ -4776,7 +4769,7 @@ int vfs_rename(struct renamedata *rd) if (error) goto out; } - error = old_dir->i_op->rename(rd->new_mnt_userns, old_dir, old_dentry, + error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry, new_dir, new_dentry, flags); if (error) goto out; @@ -4921,10 +4914,10 @@ retry_deleg: rd.old_dir = old_path.dentry->d_inode; rd.old_dentry = old_dentry; - rd.old_mnt_userns = mnt_user_ns(old_path.mnt); + rd.old_mnt_idmap = mnt_idmap(old_path.mnt); rd.new_dir = new_path.dentry->d_inode; rd.new_dentry = new_dentry; - rd.new_mnt_userns = mnt_user_ns(new_path.mnt); + rd.new_mnt_idmap = mnt_idmap(new_path.mnt); rd.delegated_inode = &delegated_inode; rd.flags = flags; error = vfs_rename(&rd); diff --git a/fs/namespace.c b/fs/namespace.c index ab467ee58341..5927d90e24a0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -75,22 +75,6 @@ static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ -struct mnt_idmap { - struct user_namespace *owner; - refcount_t count; -}; - -/* - * Carries the initial idmapping of 0:0:4294967295 which is an identity - * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is - * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. - */ -struct mnt_idmap nop_mnt_idmap = { - .owner = &init_user_ns, - .count = REFCOUNT_INIT(1), -}; -EXPORT_SYMBOL_GPL(nop_mnt_idmap); - struct mount_kattr { unsigned int attr_set; unsigned int attr_clr; @@ -210,104 +194,6 @@ int mnt_get_count(struct mount *mnt) #endif } -/** - * mnt_idmap_owner - retrieve owner of the mount's idmapping - * @idmap: mount idmapping - * - * This helper will go away once the conversion to use struct mnt_idmap - * everywhere has finished at which point the helper will be unexported. - * - * Only code that needs to perform permission checks based on the owner of the - * idmapping will get access to it. All other code will solely rely on - * idmappings. This will get us type safety so it's impossible to conflate - * filesystems idmappings with mount idmappings. - * - * Return: The owner of the idmapping. - */ -struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap) -{ - return idmap->owner; -} -EXPORT_SYMBOL_GPL(mnt_idmap_owner); - -/** - * mnt_user_ns - retrieve owner of an idmapped mount - * @mnt: the relevant vfsmount - * - * This helper will go away once the conversion to use struct mnt_idmap - * everywhere has finished at which point the helper will be unexported. - * - * Only code that needs to perform permission checks based on the owner of the - * idmapping will get access to it. All other code will solely rely on - * idmappings. This will get us type safety so it's impossible to conflate - * filesystems idmappings with mount idmappings. - * - * Return: The owner of the idmapped. - */ -struct user_namespace *mnt_user_ns(const struct vfsmount *mnt) -{ - struct mnt_idmap *idmap = mnt_idmap(mnt); - - /* Return the actual owner of the filesystem instead of the nop. */ - if (idmap == &nop_mnt_idmap && - !initial_idmapping(mnt->mnt_sb->s_user_ns)) - return mnt->mnt_sb->s_user_ns; - return mnt_idmap_owner(idmap); -} -EXPORT_SYMBOL_GPL(mnt_user_ns); - -/** - * alloc_mnt_idmap - allocate a new idmapping for the mount - * @mnt_userns: owning userns of the idmapping - * - * Allocate a new struct mnt_idmap which carries the idmapping of the mount. - * - * Return: On success a new idmap, on error an error pointer is returned. - */ -static struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns) -{ - struct mnt_idmap *idmap; - - idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT); - if (!idmap) - return ERR_PTR(-ENOMEM); - - idmap->owner = get_user_ns(mnt_userns); - refcount_set(&idmap->count, 1); - return idmap; -} - -/** - * mnt_idmap_get - get a reference to an idmapping - * @idmap: the idmap to bump the reference on - * - * If @idmap is not the @nop_mnt_idmap bump the reference count. - * - * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed. - */ -static inline struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) -{ - if (idmap != &nop_mnt_idmap) - refcount_inc(&idmap->count); - - return idmap; -} - -/** - * mnt_idmap_put - put a reference to an idmapping - * @idmap: the idmap to put the reference on - * - * If this is a non-initial idmapping, put the reference count when a mount is - * released and free it if we're the last user. - */ -static inline void mnt_idmap_put(struct mnt_idmap *idmap) -{ - if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) { - put_user_ns(idmap->owner); - kfree(idmap); - } -} - static struct mount *alloc_vfsmnt(const char *name) { struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -4094,7 +3980,7 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) * Creating an idmapped mount with the filesystem wide idmapping * doesn't make sense so block that. We don't allow mushy semantics. */ - if (mnt_idmap_owner(kattr->mnt_idmap) == fs_userns) + if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb)) return -EINVAL; /* @@ -4340,7 +4226,7 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, * result. */ mnt_userns = container_of(ns, struct user_namespace, ns); - if (initial_idmapping(mnt_userns)) { + if (mnt_userns == &init_user_ns) { err = -EPERM; goto out_fput; } diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 1ead5bd740c2..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT config NFS_V4_2_READ_PLUS bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" depends on NFS_V4_2 - default y + default n help - Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS - attempts to improve read performance by compressing out sparse holes - in the file contents. + This is intended for developers only. The READ_PLUS operation has + been shown to have issues under specific conditions and should not + be used in production. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea1ceffa1d3a..f8e420464b77 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2296,7 +2296,7 @@ EXPORT_SYMBOL_GPL(nfs_instantiate); * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -int nfs_create(struct user_namespace *mnt_userns, struct inode *dir, +int nfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct iattr attr; @@ -2325,7 +2325,7 @@ EXPORT_SYMBOL_GPL(nfs_create); * See comments for nfs_proc_create regarding failed operations. */ int -nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +nfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; @@ -2352,7 +2352,7 @@ EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. */ -int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct iattr attr; @@ -2524,7 +2524,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink); * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ -int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct page *page; @@ -2642,7 +2642,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) * If these conditions are met, we can drop the dentries before doing * the rename. */ -int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -2957,12 +2957,14 @@ static u64 nfs_access_login_time(const struct task_struct *task, const struct cred *cred) { const struct task_struct *parent; + const struct cred *pcred; u64 ret; rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); - if (parent == task || cred_fscmp(parent->cred, cred) != 0) + pcred = rcu_dereference(parent->cred); + if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; } @@ -3023,6 +3025,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre * but do it without locking. */ struct nfs_inode *nfsi = NFS_I(inode); + u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; int err = -ECHILD; struct list_head *lh; @@ -3037,6 +3040,8 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre cache = NULL; if (cache == NULL) goto out; + if ((s64)(login_time - cache->timestamp) > 0) + goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; *mask = cache->mask; @@ -3257,7 +3262,7 @@ static int nfs_execute_ok(struct inode *inode, int mask) return ret; } -int nfs_permission(struct user_namespace *mnt_userns, +int nfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { @@ -3308,7 +3313,7 @@ out_notsup: res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER); if (res == 0) - res = generic_permission(&init_user_ns, inode, mask); + res = generic_permission(&nop_mnt_idmap, inode, mask); goto out; } EXPORT_SYMBOL_GPL(nfs_permission); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 01596f2d0a1e..1a9d5aa51dfb 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -145,17 +145,10 @@ out: return parent; } -static u64 nfs_fetch_iversion(struct inode *inode) -{ - nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); - return inode_peek_iversion_raw(inode); -} - const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .fetch_iversion = nfs_fetch_iversion, .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| EXPORT_OP_NOATOMIC_ATTR, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d8ec889a4b3f..b0f3c9339e70 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -31,6 +31,7 @@ #include <linux/swap.h> #include <linux/uaccess.h> +#include <linux/filelock.h> #include "delegation.h" #include "internal.h" diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ad34a33b0737..4974cd18ca46 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e98ee7599eeb..222a28320e1c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -606,7 +606,7 @@ EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int -nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -825,10 +825,12 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + reply_mask |= STATX_CHANGE_COOKIE; return reply_mask; } -int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); @@ -843,7 +845,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS; + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | + STATX_CHANGE_COOKIE; if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { if (readdirplus_enabled) @@ -851,8 +854,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, goto out_no_revalidate; } - /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + /* Flush out writes to the server in order to update c/mtime/version. */ + if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && S_ISREG(inode->i_mode)) filemap_write_and_wait(inode->i_mapping); @@ -872,7 +875,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| - STATX_SIZE|STATX_BLOCKS))) + STATX_SIZE|STATX_BLOCKS| + STATX_CHANGE_COOKIE))) goto out_no_revalidate; /* Check whether the cached attributes are stale */ @@ -908,8 +912,12 @@ out_no_revalidate: /* Only return attributes that were revalidated. */ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + stat->change_cookie = inode_peek_iversion_raw(inode); + stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED) + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; out: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ae7d4a8c728c..41468c21291d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -384,18 +384,18 @@ extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); void nfs_d_prune_case_insensitive_aliases(struct inode *inode); -int nfs_create(struct user_namespace *, struct inode *, struct dentry *, +int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *, +int nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int nfs_rmdir(struct inode *, struct dentry *); int nfs_unlink(struct inode *, struct dentry *); -int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *, +int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int nfs_link(struct dentry *, struct inode *, struct dentry *); -int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t, +int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -int nfs_rename(struct user_namespace *, struct inode *, struct dentry *, +int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); #ifdef CONFIG_NFS_V4_2 diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index b0ef7e7ddb30..19d51ebf842c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -208,23 +208,23 @@ out_fc: } static int -nfs_namespace_getattr(struct user_namespace *mnt_userns, +nfs_namespace_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { if (NFS_FH(d_inode(path->dentry))->size != 0) - return nfs_getattr(mnt_userns, path, stat, request_mask, + return nfs_getattr(idmap, path, stat, request_mask, query_flags); - generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); return 0; } static int -nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { if (NFS_FH(d_inode(dentry))->size != 0) - return nfs_setattr(mnt_userns, dentry, attr); + return nfs_setattr(idmap, dentry, attr); return -EACCES; } diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index df9ca56db347..4fa37dc038b5 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -12,7 +12,7 @@ */ #ifdef CONFIG_NFS_V3_ACL extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu); -extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, struct posix_acl *dfacl); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 74d11e3c4205..1247f544a440 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -255,7 +255,7 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } -int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct posix_acl *orig = acl, *dfacl = NULL, *alloc; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5edd1704f735..4c9f8bd866ab 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -23,6 +23,7 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) #include <linux/seqlock.h> +#include <linux/filelock.h> struct idmap; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 40d749f29ed3..d9c332019d06 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7692,7 +7692,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7716,7 +7716,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) #define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl" static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7739,7 +7739,7 @@ static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry) #define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl" static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7764,7 +7764,7 @@ static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry) #ifdef CONFIG_NFS_V4_SECURITY_LABEL static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7815,7 +7815,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) #ifdef CONFIG_NFS_V4_2 static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 16be6dae524f..779bfc37233c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -21,6 +21,7 @@ #include <linux/nfs_page.h> #include <linux/nfs_mount.h> #include <linux/export.h> +#include <linux/filelock.h> #include "internal.h" #include "pnfs.h" diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 80c240e50952..1a80d548253a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -25,6 +25,7 @@ #include <linux/freezer.h> #include <linux/wait.h> #include <linux/iversion.h> +#include <linux/filelock.h> #include <linux/uaccess.h> #include <linux/sched/mm.h> diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 0a9b72685f98..1479583fbb62 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -9,6 +9,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/fs.h> +#include <linux/filelock.h> static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 45b2c9e3f636..c0950edb26b0 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -662,6 +662,39 @@ static struct shrinker nfsd_file_shrinker = { }; /** + * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file + * @nf: nfsd_file to attempt to queue + * @dispose: private list to queue successfully-put objects + * + * Unhash an nfsd_file, try to get a reference to it, and then put that + * reference. If it's the last reference, queue it to the dispose list. + */ +static void +nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) + __must_hold(RCU) +{ + int decrement = 1; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + return; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + return; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } +} + +/** * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode * @inode: inode on which to close out nfsd_files * @dispose: list on which to gather nfsd_files to close out @@ -688,30 +721,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) rcu_read_lock(); do { - int decrement = 1; - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - - /* If we raced with someone else unhashing, ignore it */ - if (!nfsd_file_unhash(nf)) - continue; - - /* If we can't get a reference, ignore it */ - if (!nfsd_file_get(nf)) - continue; - - /* Extra decrement if we remove from the LRU */ - if (nfsd_file_lru_remove(nf)) - ++decrement; - - /* If refcount goes to 0, then put on the dispose list */ - if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); - trace_nfsd_file_closing(nf); - } + nfsd_file_cond_queue(nf, dispose); } while (1); rcu_read_unlock(); } @@ -928,11 +942,8 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) { - nfsd_file_unhash(nf); - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, &dispose); - } + if (!net || nf->nf_net == net) + nfsd_file_cond_queue(nf, &dispose); nf = rhashtable_walk_next(&iter); } @@ -1071,8 +1082,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1147,8 +1158,7 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1158,20 +1168,23 @@ out: out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1207,7 +1220,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1228,28 +1241,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb1..41516a4263ea 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285b..ec49b200b797 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -10,6 +10,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/filelock.h> #include <linux/percpu_counter.h> #include <linux/siphash.h> @@ -195,7 +196,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1457f59f447a..995cb2c90b1a 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -113,11 +113,11 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) inode_lock(inode); - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS, argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT, argp->acl_default); if (error) goto out_drop_lock; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 647108138e8a..887803735e2a 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -103,11 +103,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) inode_lock(inode); - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS, argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT, argp->acl_default); out_drop_lock: diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index d01b29aba662..f41992ecd0d7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -320,7 +320,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); fh_fill_pre_attrs(fhp); - host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); + host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true); if (host_err < 0) { status = nfserrno(host_err); goto out; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bd880d55f565..f189ba7995f5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -1318,6 +1318,7 @@ try_again: /* allow 20secs for mount/unmount for now - revisit */ if (signal_pending(current) || (schedule_timeout(20*HZ) == 0)) { + finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); return nfserr_eagain; } @@ -2607,12 +2608,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2734,7 +2734,7 @@ encode_op: out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 78b8cd9651d5..3509e73abe1f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -233,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU); + status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); out_put: dput(dentry); out_unlock: @@ -353,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) status = -ENOENT; if (d_really_is_negative(dentry)) goto out; - status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); out: dput(dentry); out_unlock: @@ -443,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - status = vfs_rmdir(&init_user_ns, d_inode(parent), child); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child); if (status) printk("failed to remove client recovery directory %pd\n", child); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7b2ee535ade8..c1684da6c01f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4411,7 +4411,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) if (!count) count = atomic_long_read(&num_delegations); if (count) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } @@ -4421,7 +4421,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4443,16 +4443,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -5262,18 +5252,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { @@ -5374,7 +5356,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, { struct nfs4_ol_stateid *st; struct file *f = fp->fi_deleg_file->nf_file; - struct inode *ino = locks_inode(f); + struct inode *ino = file_inode(f); int writes; writes = atomic_read(&ino->i_writecount); @@ -6243,8 +6225,7 @@ deleg_reaper(struct nfsd_net *nn) static void nfsd4_state_shrinker_worker(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); @@ -7828,7 +7809,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) return status; } - inode = locks_inode(nf->nf_file); + inode = file_inode(nf->nf_file); flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { @@ -8074,11 +8055,20 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8171,6 +8161,8 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8190,7 +8182,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif @@ -8200,6 +8191,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2b4ae858c89b..e12e5a4ad502 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } @@ -2965,7 +2965,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; } - err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + err = vfs_getattr(&path, &stat, + STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, + AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; if (!(stat.result_mask & STATX_BTIME)) @@ -3629,6 +3631,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, case nfserr_noent: xdr_truncate_encode(xdr, start_offset); goto skip_entry; + case nfserr_jukebox: + /* + * The pseudoroot should only display dentries that lead to + * exports. If we get EJUKEBOX here, then we can't tell whether + * this entry should be included. Just fail the whole READDIR + * with NFS4ERR_DELAY in that case, and hope that the situation + * will resolve itself by the client's next attempt. + */ + if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT) + goto fail; + fallthrough; default: /* * If the client requested the RDATTR_ERROR attribute, diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d1e581a60480..c2577ee7ffb2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1457,9 +1457,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1469,8 +1467,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1486,7 +1482,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 93b42ef9ed91..fa0144a74267 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8c52b6c9d31a..ccd8485fee04 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -40,7 +40,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = inode_permission(&init_user_ns, + err = inode_permission(&nop_mnt_idmap, d_inode(parent), MAY_EXEC); if (err < 0) { dput(parent); @@ -628,6 +628,10 @@ void fh_fill_pre_attrs(struct svc_fh *fhp) stat.mtime = inode->i_mtime; stat.ctime = inode->i_ctime; stat.size = inode->i_size; + if (v4 && IS_I_VERSION(inode)) { + stat.change_cookie = inode_query_iversion(inode); + stat.result_mask |= STATX_CHANGE_COOKIE; + } } if (v4) fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); @@ -659,6 +663,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp) if (err) { fhp->fh_post_saved = false; fhp->fh_post_attr.ctime = inode->i_ctime; + if (v4 && IS_I_VERSION(inode)) { + fhp->fh_post_attr.change_cookie = inode_query_iversion(inode); + fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE; + } } else fhp->fh_post_saved = true; if (v4) @@ -748,3 +756,37 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) return FSIDSOURCE_UUID; return FSIDSOURCE_DEV; } + +/* + * We could use i_version alone as the change attribute. However, i_version + * can go backwards on a regular file after an unclean shutdown. On its own + * that doesn't necessarily cause a problem, but if i_version goes backwards + * and then is incremented again it could reuse a value that was previously + * used before boot, and a client who queried the two values might incorrectly + * assume nothing changed. + * + * By using both ctime and the i_version counter we guarantee that as long as + * time doesn't go backwards we never reuse an old value. If the filesystem + * advertises STATX_ATTR_CHANGE_MONOTONIC, then this mitigation is not + * needed. + * + * We only need to do this for regular files as well. For directories, we + * assume that the new change attr is always logged to stable storage in some + * fashion before the results can be seen. + */ +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) +{ + u64 chattr; + + if (stat->result_mask & STATX_CHANGE_COOKIE) { + chattr = stat->change_cookie; + if (S_ISREG(inode->i_mode) && + !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { + chattr += (u64)stat->ctime.tv_sec << 30; + chattr += stat->ctime.tv_nsec; + } + } else { + chattr = time_to_chattr(&stat->ctime); + } + return chattr; +} diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 513e028b0bbe..4e0ecf0ae2cf 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -293,34 +293,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) fhp->fh_pre_saved = false; } -/* - * We could use i_version alone as the change attribute. However, - * i_version can go backwards after a reboot. On its own that doesn't - * necessarily cause a problem, but if i_version goes backwards and then - * is incremented again it could reuse a value that was previously used - * before boot, and a client who queried the two values might - * incorrectly assume nothing changed. - * - * By using both ctime and the i_version counter we guarantee that as - * long as time doesn't go backwards we never reuse an old value. - */ -static inline u64 nfsd4_change_attribute(struct kstat *stat, - struct inode *inode) -{ - if (inode->i_sb->s_export_op->fetch_iversion) - return inode->i_sb->s_export_op->fetch_iversion(inode); - else if (IS_I_VERSION(inode)) { - u64 chattr; - - chattr = stat->ctime.tv_sec; - chattr <<= 30; - chattr += stat->ctime.tv_nsec; - chattr += inode_query_iversion(inode); - return chattr; - } else - return time_to_chattr(&stat->ctime); -} - +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode); extern void fh_fill_pre_attrs(struct svc_fh *fhp); extern void fh_fill_post_attrs(struct svc_fh *fhp); extern void fh_fill_both_attrs(struct svc_fh *fhp); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3f..a82d91afdc9c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -93,7 +93,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) if (delta < 0) delta = -delta; if (delta < MAX_TOUCH_TIME_ERROR && - setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) { + setattr_prepare(&nop_mnt_idmap, fhp->fh_dentry, iap) != 0) { /* * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. * This will cause notify_change to set these times @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 56fba1cba3af..325d3d3f1211 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -453,8 +453,8 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); + nfsd_file_cache_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); nn->lockd_up = false; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c852ae8eaf37..8f9c82d9e075 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4c3a0d84043c..ab4ee3509ce3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -426,7 +426,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (iap->ia_size < 0) return -EFBIG; - host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); + host_err = notify_change(&nop_mnt_idmap, dentry, &size_attr, NULL); if (host_err) return host_err; iap->ia_valid &= ~ATTR_SIZE; @@ -444,7 +444,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) return 0; iap->ia_valid |= ATTR_CTIME; - return notify_change(&init_user_ns, dentry, iap, NULL); + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } /** @@ -542,12 +542,12 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, attr->na_labelerr = security_inode_setsecctx(dentry, attr->na_seclabel->data, attr->na_seclabel->len); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) - attr->na_aclerr = set_posix_acl(&init_user_ns, + attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, dentry, ACL_TYPE_ACCESS, attr->na_pacl); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) - attr->na_aclerr = set_posix_acl(&init_user_ns, + attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, dentry, ACL_TYPE_DEFAULT, attr->na_dpacl); inode_unlock(inode); @@ -583,7 +583,7 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; - if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME, + if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) return 0; return 1; @@ -1363,12 +1363,13 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); + host_err = vfs_create(&nop_mnt_idmap, dirp, dchild, + iap->ia_mode, true); if (!host_err) nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode); + host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); if (!host_err && unlikely(d_unhashed(dchild))) { struct dentry *d; d = lookup_one_len(dchild->d_name.name, @@ -1396,7 +1397,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - host_err = vfs_mknod(&init_user_ns, dirp, dchild, + host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, rdev); break; default: @@ -1557,7 +1558,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_drop_write; } fh_fill_pre_attrs(fhp); - host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); + host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path); err = nfserrno(host_err); cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); if (!err) @@ -1625,7 +1626,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (d_really_is_negative(dold)) goto out_dput; fh_fill_pre_attrs(ffhp); - host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); + host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL); fh_fill_post_attrs(ffhp); inode_unlock(dirp); if (!host_err) { @@ -1745,10 +1746,10 @@ retry: goto out_dput_old; } else { struct renamedata rd = { - .old_mnt_userns = &init_user_ns, + .old_mnt_idmap = &nop_mnt_idmap, .old_dir = fdir, .old_dentry = odentry, - .new_mnt_userns = &init_user_ns, + .new_mnt_idmap = &nop_mnt_idmap, .new_dir = tdir, .new_dentry = ndentry, }; @@ -1850,14 +1851,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, nfsd_close_cached_files(rdentry); for (retries = 1;;) { - host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + host_err = vfs_unlink(&nop_mnt_idmap, dirp, rdentry, NULL); if (host_err != -EAGAIN || !retries--) break; if (!nfsd_wait_for_delegreturn(rqstp, rinode)) break; } } else { - host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); + host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry); } fh_fill_post_attrs(fhp); @@ -2129,7 +2130,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, inode_lock_shared(inode); - len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0); + len = vfs_getxattr(&nop_mnt_idmap, dentry, name, NULL, 0); /* * Zero-length attribute, just return. @@ -2156,7 +2157,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, goto out; } - len = vfs_getxattr(&init_user_ns, dentry, name, buf, len); + len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len); if (len <= 0) { kvfree(buf); buf = NULL; @@ -2267,7 +2268,7 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) inode_lock(fhp->fh_dentry->d_inode); fh_fill_pre_attrs(fhp); - ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry, + ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, NULL); fh_fill_post_attrs(fhp); @@ -2294,7 +2295,7 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, inode_lock(fhp->fh_dentry->d_inode); fh_fill_pre_attrs(fhp); - ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf, + ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf, len, flags, NULL); fh_fill_post_attrs(fhp); inode_unlock(fhp->fh_dentry->d_inode); @@ -2378,14 +2379,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, return 0; /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ - err = inode_permission(&init_user_ns, inode, + err = inode_permission(&nop_mnt_idmap, inode, acc & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) || acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC))) - err = inode_permission(&init_user_ns, inode, MAY_EXEC); + err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC); return err? nfserrno(err) : 0; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index dbdfef7ae85b..43fb57a301d3 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -170,9 +170,14 @@ static inline void fh_drop_write(struct svc_fh *fh) static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) { + u32 request_mask = STATX_BASIC_STATS; struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; - return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS, + + if (fh->fh_maxsize == NFS4_FHSIZE) + request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); + + return nfserrno(vfs_getattr(&p, stat, request_mask, AT_STATX_SYNC_AS_STAT)); } diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b9d15c3df3cc..40ce92a332fe 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { - if (ret != -EEXIST) - return ret; - goto out_check; + if (likely(ret == -EEXIST)) + goto out_check; + if (ret == -ENOENT) { + /* + * Block address translation failed due to invalid + * value of 'ptr'. In this case, return internal code + * -EINVAL (broken bmap) to notify bmap layer of fatal + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } if (ra) { diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 232dd7b6cca1..1310d2d5feb3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -364,7 +364,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) ii->i_bh = bh; atomic64_inc(&root->inodes_count); - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); @@ -949,7 +949,7 @@ void nilfs_evict_inode(struct inode *inode) */ } -int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct nilfs_transaction_info ti; @@ -957,7 +957,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct super_block *sb = inode->i_sb; int err; - err = setattr_prepare(&init_user_ns, dentry, iattr); + err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (err) return err; @@ -972,7 +972,7 @@ int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, nilfs_truncate(inode); } - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { @@ -988,7 +988,7 @@ out_err: return err; } -int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct nilfs_root *root = NILFS_I(inode)->i_root; @@ -997,7 +997,7 @@ int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 87e1004b606d..5ccc638ae92f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -128,7 +128,7 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) /** * nilfs_fileattr_set - ioctl to support chattr */ -int nilfs_fileattr_set(struct user_namespace *mnt_userns, +int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); @@ -1114,7 +1114,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) minseg = range[0] + segbytes - 1; do_div(minseg, segbytes); + + if (range[1] < 4096) + goto out; + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); + if (maxseg < segbytes) + goto out; + do_div(maxseg, segbytes); maxseg--; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 23899e0ae850..c7024da8f1e2 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -72,7 +72,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int nilfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; @@ -100,7 +100,7 @@ static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir, } static int -nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +nilfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -125,7 +125,7 @@ nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct nilfs_transaction_info ti; @@ -202,7 +202,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, return err; } -static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int nilfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -340,7 +340,7 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) return err; } -static int nilfs_rename(struct user_namespace *mnt_userns, +static int nilfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index aecda4fc95f5..8046490cd7fe 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -242,7 +242,7 @@ extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); /* ioctl.c */ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *m); -int nilfs_fileattr_set(struct user_namespace *mnt_userns, +int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long nilfs_ioctl(struct file *, unsigned int, unsigned long); long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -271,10 +271,10 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); -extern int nilfs_setattr(struct user_namespace *, struct dentry *, +extern int nilfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void nilfs_write_failed(struct address_space *mapping, loff_t to); -int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6edb6e0dd61f..1422b8ba24ed 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -409,6 +409,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) goto out; /* + * Prevent underflow in second superblock position calculation. + * The exact minimum size check is done in nilfs_sufile_resize(). + */ + if (newsize < 4096) { + ret = -ENOSPC; + goto out; + } + + /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2064e6473d30..3a4c9c150cbf 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -544,9 +544,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, { struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; - u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev)); + u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev); int valid[2], swp = 0; + if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { + nilfs_err(sb, "device size too small"); + return -EINVAL; + } + sb2off = NILFS_SB2_OFFSET_BYTES(devsize); + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, &sbh[0]); sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 08c659332e26..e6fc5f7cb1d7 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2865,7 +2865,7 @@ void ntfs_truncate_vfs(struct inode *vi) { /** * ntfs_setattr - called from notify_change() when an attribute is being changed - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: dentry whose attributes to change * @attr: structure describing the attributes and the changes * @@ -2878,14 +2878,14 @@ void ntfs_truncate_vfs(struct inode *vi) { * * Called with ->i_mutex held. */ -int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *vi = d_inode(dentry); int err; unsigned int ia_valid = attr->ia_valid; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) goto out; /* We do not support NTFS ACLs yet. */ diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 6f78ee00f57f..147ef4ddb691 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -289,7 +289,7 @@ extern int ntfs_show_options(struct seq_file *sf, struct dentry *root); extern int ntfs_truncate(struct inode *vi); extern void ntfs_truncate_vfs(struct inode *vi); -extern int ntfs_setattr(struct user_namespace *mnt_userns, +extern int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern int __ntfs_write_inode(struct inode *vi, int sync); diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index e5399ebc3a2b..e9bdc1ff08c9 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -70,7 +70,7 @@ static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg) /* * ntfs_getattr - inode_operations::getattr */ -int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags) { struct inode *inode = d_inode(path->dentry); @@ -84,7 +84,7 @@ int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED; - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); stat->result_mask |= STATX_BTIME; stat->btime = ni->i_crtime; @@ -390,10 +390,10 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); - ni_lock(ni); - truncate_setsize(inode, new_size); + ni_lock(ni); + down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, &new_valid, ni->mi.sbi->options->prealloc, NULL); @@ -657,7 +657,7 @@ out: /* * ntfs3_setattr - inode_operations::setattr */ -int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct super_block *sb = dentry->d_sb; @@ -676,7 +676,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ia_valid = attr->ia_valid; } - err = setattr_prepare(mnt_userns, dentry, attr); + err = setattr_prepare(idmap, dentry, attr); if (err) goto out; @@ -704,10 +704,10 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, inode->i_size = newsize; } - setattr_copy(mnt_userns, inode, attr); + setattr_copy(idmap, inode, attr); if (mode != inode->i_mode) { - err = ntfs_acl_chmod(mnt_userns, dentry); + err = ntfs_acl_chmod(idmap, dentry); if (err) goto out; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 20b953871574..8ce2616b087f 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1185,7 +1185,7 @@ out: * * NOTE: if fnd != NULL (ntfs_atomic_open) then @dir is locked */ -struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, +struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const struct cpu_str *uni, umode_t mode, dev_t dev, const char *symname, u32 size, @@ -1307,7 +1307,7 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, goto out3; } inode = &ni->vfs_inode; - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); mode = inode->i_mode; inode->i_atime = inode->i_mtime = inode->i_ctime = ni->i_crtime = @@ -1614,7 +1614,7 @@ struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, #ifdef CONFIG_NTFS3_FS_POSIX_ACL if (!S_ISLNK(mode) && (sb->s_flags & SB_POSIXACL)) { - err = ntfs_init_acl(mnt_userns, inode, dir); + err = ntfs_init_acl(idmap, inode, dir); if (err) goto out7; } else diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index c8db35e2ae17..407fe92394e2 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -94,12 +94,12 @@ static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry, /* * ntfs_create - inode_operations::create */ -static int ntfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; - inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFREG | mode, + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; @@ -110,12 +110,12 @@ static int ntfs_create(struct user_namespace *mnt_userns, struct inode *dir, * * inode_operations::mknod */ -static int ntfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; - inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, mode, rdev, + inode = ntfs_create_inode(idmap, dir, dentry, NULL, mode, rdev, NULL, 0, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; @@ -183,13 +183,13 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry) /* * ntfs_symlink - inode_operations::symlink */ -static int ntfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { u32 size = strlen(symname); struct inode *inode; - inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFLNK | 0777, + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0, symname, size, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; @@ -198,12 +198,12 @@ static int ntfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, /* * ntfs_mkdir- inode_operations::mkdir */ -static int ntfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ntfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; - inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFDIR | mode, + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFDIR | mode, 0, NULL, 0, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; @@ -229,7 +229,7 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry) /* * ntfs_rename - inode_operations::rename */ -static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir, +static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, struct inode *new_dir, struct dentry *new_dentry, u32 flags) { @@ -415,13 +415,13 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, /* * Unfortunately I don't know how to get here correct 'struct nameidata *nd' - * or 'struct user_namespace *mnt_userns'. + * or 'struct mnt_idmap *idmap'. * See atomic_open in fs/namei.c. * This is why xfstest/633 failed. - * Looks like ntfs_atomic_open must accept 'struct user_namespace *mnt_userns' as argument. + * Looks like ntfs_atomic_open must accept 'struct mnt_idmap *idmap' as argument. */ - inode = ntfs_create_inode(&init_user_ns, dir, dentry, uni, mode, 0, + inode = ntfs_create_inode(&nop_mnt_idmap, dir, dentry, uni, mode, 0, NULL, 0, fnd); err = IS_ERR(inode) ? PTR_ERR(inode) : finish_open(file, dentry, ntfs_file_open); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 0e051c5595a2..80072e5f96f7 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -492,10 +492,12 @@ bool dir_is_empty(struct inode *dir); extern const struct file_operations ntfs_dir_operations; /* Globals from file.c */ -int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); -int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, + CLST len); int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); @@ -706,7 +708,7 @@ int ntfs_sync_inode(struct inode *inode); int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); int inode_write_data(struct inode *inode, const void *data, size_t bytes); -struct inode *ntfs_create_inode(struct user_namespace *mnt_userns, +struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const struct cpu_str *uni, umode_t mode, dev_t dev, const char *symname, u32 size, @@ -857,17 +859,17 @@ unsigned long ntfs_names_hash(const u16 *name, size_t len, const u16 *upcase, /* globals from xattr.c */ #ifdef CONFIG_NTFS3_FS_POSIX_ACL struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu); -int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ntfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); -int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode, struct inode *dir); #else #define ntfs_get_acl NULL #define ntfs_set_acl NULL #endif -int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry); -int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry); +int ntfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size); extern const struct xattr_handler *ntfs_xattr_handlers[]; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 616df209feea..ff64302e87e5 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -578,7 +578,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) return ntfs_get_acl_ex(inode, type, 0); } -static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, +static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap, struct inode *inode, struct posix_acl *acl, int type, bool init_acl) { @@ -597,7 +597,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, case ACL_TYPE_ACCESS: /* Do not change i_mode if we are in init_acl */ if (acl && !init_acl) { - err = posix_acl_update_mode(mnt_userns, inode, &mode, + err = posix_acl_update_mode(idmap, inode, &mode, &acl); if (err) return err; @@ -652,10 +652,10 @@ out: /* * ntfs_set_acl - inode_operations::set_acl */ -int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ntfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { - return ntfs_set_acl_ex(mnt_userns, d_inode(dentry), acl, type, false); + return ntfs_set_acl_ex(idmap, d_inode(dentry), acl, type, false); } /* @@ -663,7 +663,7 @@ int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, * * Called from ntfs_create_inode(). */ -int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode, struct inode *dir) { struct posix_acl *default_acl, *acl; @@ -674,7 +674,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, return err; if (default_acl) { - err = ntfs_set_acl_ex(mnt_userns, inode, default_acl, + err = ntfs_set_acl_ex(idmap, inode, default_acl, ACL_TYPE_DEFAULT, true); posix_acl_release(default_acl); } else { @@ -683,7 +683,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, if (acl) { if (!err) - err = ntfs_set_acl_ex(mnt_userns, inode, acl, + err = ntfs_set_acl_ex(idmap, inode, acl, ACL_TYPE_ACCESS, true); posix_acl_release(acl); } else { @@ -697,7 +697,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, /* * ntfs_acl_chmod - Helper for ntfs3_setattr(). */ -int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry) +int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry) { struct inode *inode = d_inode(dentry); struct super_block *sb = inode->i_sb; @@ -708,13 +708,13 @@ int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry) if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - return posix_acl_chmod(mnt_userns, dentry, inode->i_mode); + return posix_acl_chmod(idmap, dentry, inode->i_mode); } /* * ntfs_permission - inode_operations::permission */ -int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int ntfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { if (ntfs_sb(inode->i_sb)->options->noacsrules) { @@ -722,7 +722,7 @@ int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode, return 0; } - return generic_permission(mnt_userns, inode, mask); + return generic_permission(idmap, inode, mask); } /* @@ -835,7 +835,7 @@ out: * ntfs_setxattr - inode_operations::setxattr */ static noinline int ntfs_setxattr(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *de, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 9f19cf9a5a9f..9fd03eaf15f8 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -260,7 +260,7 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } -int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ocfs2_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct buffer_head *bh = NULL; @@ -274,7 +274,7 @@ int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (type == ACL_TYPE_ACCESS && acl) { umode_t mode; - status = posix_acl_update_mode(&init_user_ns, inode, &mode, + status = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (status) goto unlock; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index a897c4e41b26..667c6f03fa60 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -17,7 +17,7 @@ struct ocfs2_acl_entry { }; struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu); -int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ocfs2_iop_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 8b2020f92b5f..ba26c5567cff 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -188,18 +188,18 @@ static int dlmfs_file_release(struct inode *inode, * We do ->setattr() just to override size changes. Our size is the size * of the LVB and nothing else. */ -static int dlmfs_file_setattr(struct user_namespace *mnt_userns, +static int dlmfs_file_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); attr->ia_valid &= ~ATTR_SIZE; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } @@ -336,7 +336,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) if (inode) { inode->i_ino = get_next_ino(); - inode_init_owner(&init_user_ns, inode, NULL, mode); + inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inc_nlink(inode); @@ -359,7 +359,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, return NULL; inode->i_ino = get_next_ino(); - inode_init_owner(&init_user_ns, inode, parent, mode); + inode_init_owner(&nop_mnt_idmap, inode, parent, mode); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); ip = DLMFS_I(inode); @@ -402,7 +402,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, * File creation. Allocate an inode, and we're done.. */ /* SMP-safe */ -static int dlmfs_mkdir(struct user_namespace * mnt_userns, +static int dlmfs_mkdir(struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode) @@ -451,7 +451,7 @@ bail: return status; } -static int dlmfs_create(struct user_namespace *mnt_userns, +static int dlmfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5c60b6bc85bf..efb09de4343d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1111,7 +1111,7 @@ out: return ret; } -int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int status = 0, size_change; @@ -1142,11 +1142,11 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) return 0; - status = setattr_prepare(&init_user_ns, dentry, attr); + status = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (status) return status; - if (is_quota_modification(mnt_userns, inode, attr)) { + if (is_quota_modification(&nop_mnt_idmap, inode, attr)) { status = dquot_initialize(inode); if (status) return status; @@ -1265,7 +1265,7 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); status = ocfs2_mark_inode_dirty(handle, inode, bh); @@ -1302,7 +1302,7 @@ bail: return status; } -int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct inode *inode = d_inode(path->dentry); @@ -1317,7 +1317,7 @@ int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path, goto bail; } - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); /* * If there is inline data in the inode, the inode will normally not * have data blocks allocated (it may have an external xattr block). @@ -1334,7 +1334,7 @@ bail: return err; } -int ocfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, +int ocfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int ret, had_lock; @@ -1360,7 +1360,7 @@ int ocfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, dump_stack(); } - ret = generic_permission(&init_user_ns, inode, mask); + ret = generic_permission(&nop_mnt_idmap, inode, mask); ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); out: @@ -1991,7 +1991,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, } } - if (file && setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) { + if (file && setattr_should_drop_suidgid(&nop_mnt_idmap, file_inode(file))) { ret = __ocfs2_write_remove_suid(inode, di_bh); if (ret) { mlog_errno(ret); @@ -2279,7 +2279,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ - if (setattr_should_drop_suidgid(&init_user_ns, inode)) { + if (setattr_should_drop_suidgid(&nop_mnt_idmap, inode)) { if (meta_level == 0) { ocfs2_inode_unlock_for_extent_tree(inode, &di_bh, diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 71db8f3aa027..8e53e4ac1120 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -49,11 +49,11 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size, u64 zero_to); int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, loff_t zero_to); -int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); -int ocfs2_permission(struct user_namespace *mnt_userns, +int ocfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index afd54ec66103..811a6ea374bb 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -82,7 +82,7 @@ int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa) return status; } -int ocfs2_fileattr_set(struct user_namespace *mnt_userns, +int ocfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h index 0297c8846945..48a5fdfe87a1 100644 --- a/fs/ocfs2/ioctl.h +++ b/fs/ocfs2/ioctl.h @@ -12,7 +12,7 @@ #define OCFS2_IOCTL_PROTO_H int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int ocfs2_fileattr_set(struct user_namespace *mnt_userns, +int ocfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 73a3854b2afb..f37174e79fad 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -8,6 +8,7 @@ */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/fcntl.h> #include <cluster/masklog.h> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a8fd51afb794..9175dbc47201 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -197,8 +197,8 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode) * callers. */ if (S_ISDIR(mode)) set_nlink(inode, 2); - mode = mode_strip_sgid(&init_user_ns, dir, mode); - inode_init_owner(&init_user_ns, inode, dir, mode); + mode = mode_strip_sgid(&nop_mnt_idmap, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); status = dquot_initialize(inode); if (status) return ERR_PTR(status); @@ -221,7 +221,7 @@ static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb, iput(inode); } -static int ocfs2_mknod(struct user_namespace *mnt_userns, +static int ocfs2_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, @@ -642,7 +642,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, fe_blkno, suballoc_loc, suballoc_bit); } -static int ocfs2_mkdir(struct user_namespace *mnt_userns, +static int ocfs2_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) @@ -651,14 +651,14 @@ static int ocfs2_mkdir(struct user_namespace *mnt_userns, trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name, OCFS2_I(dir)->ip_blkno, mode); - ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0); + ret = ocfs2_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0); if (ret) mlog_errno(ret); return ret; } -static int ocfs2_create(struct user_namespace *mnt_userns, +static int ocfs2_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, @@ -668,7 +668,7 @@ static int ocfs2_create(struct user_namespace *mnt_userns, trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno, mode); - ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0); + ret = ocfs2_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0); if (ret) mlog_errno(ret); @@ -1194,7 +1194,7 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) ocfs2_inode_unlock(inode2, 1); } -static int ocfs2_rename(struct user_namespace *mnt_userns, +static int ocfs2_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -1784,7 +1784,7 @@ bail: return status; } -static int ocfs2_symlink(struct user_namespace *mnt_userns, +static int ocfs2_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 623db358b1ef..5a656dc683f1 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4316,7 +4316,7 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC); + return inode_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC); } /** @@ -4370,7 +4370,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, * file. */ if (!preserve) { - error = inode_permission(&init_user_ns, inode, MAY_READ); + error = inode_permission(&nop_mnt_idmap, inode, MAY_READ); if (error) return error; } diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 64e6ddcfe329..05d4414d0c33 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/slab.h> diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 95d0611c5fc7..389308efe854 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7247,7 +7247,7 @@ static int ocfs2_xattr_security_get(const struct xattr_handler *handler, } static int ocfs2_xattr_security_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -7320,7 +7320,7 @@ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, } static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -7351,7 +7351,7 @@ static int ocfs2_xattr_user_get(const struct xattr_handler *handler, } static int ocfs2_xattr_user_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c219f91f44e9..82cf7e9a665f 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -279,13 +279,13 @@ out_free_inode: return err; } -static int omfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int omfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { return omfs_add_node(dir, dentry, mode | S_IFDIR); } -static int omfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int omfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return omfs_add_node(dir, dentry, mode | S_IFREG); @@ -370,7 +370,7 @@ static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx, return true; } -static int omfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int omfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 3a5b4b88a583..0101f1f87b56 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -337,13 +337,13 @@ const struct file_operations omfs_file_operations = { .splice_read = generic_file_splice_read, }; -static int omfs_setattr(struct user_namespace *mnt_userns, +static int omfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -356,7 +356,7 @@ static int omfs_setattr(struct user_namespace *mnt_userns, omfs_truncate(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 2a0e83236c01..c4c79e07efc7 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -48,7 +48,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode) goto fail; inode->i_ino = new_block; - inode_init_owner(&init_user_ns, inode, NULL, mode); + inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_mapping->a_ops = &omfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); diff --git a/fs/open.c b/fs/open.c index ceb88ac0ca3b..8038cf652583 100644 --- a/fs/open.c +++ b/fs/open.c @@ -33,10 +33,11 @@ #include <linux/dnotify.h> #include <linux/compat.h> #include <linux/mnt_idmapping.h> +#include <linux/filelock.h> #include "internal.h" -int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry, +int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) { int ret; @@ -54,7 +55,7 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry, } /* Remove suid, sgid, and file capabilities on truncate too */ - ret = dentry_needs_remove_privs(mnt_userns, dentry); + ret = dentry_needs_remove_privs(idmap, dentry); if (ret < 0) return ret; if (ret) @@ -62,14 +63,14 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry, inode_lock(dentry->d_inode); /* Note any delegations or leases have already been broken: */ - ret = notify_change(mnt_userns, dentry, &newattrs, NULL); + ret = notify_change(idmap, dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); return ret; } long vfs_truncate(const struct path *path, loff_t length) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct inode *inode; long error; @@ -85,8 +86,8 @@ long vfs_truncate(const struct path *path, loff_t length) if (error) goto out; - mnt_userns = mnt_user_ns(path->mnt); - error = inode_permission(mnt_userns, inode, MAY_WRITE); + idmap = mnt_idmap(path->mnt); + error = inode_permission(idmap, inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; @@ -108,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length) error = security_path_truncate(path); if (!error) - error = do_truncate(mnt_userns, path->dentry, length, 0, NULL); + error = do_truncate(idmap, path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); @@ -190,7 +191,7 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small) sb_start_write(inode->i_sb); error = security_file_truncate(f.file); if (!error) - error = do_truncate(file_mnt_user_ns(f.file), dentry, length, + error = do_truncate(file_mnt_idmap(f.file), dentry, length, ATTR_MTIME | ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: @@ -459,7 +460,7 @@ retry: goto out_path_release; } - res = inode_permission(mnt_user_ns(path.mnt), inode, mode | MAY_ACCESS); + res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; @@ -603,7 +604,7 @@ retry_deleg: goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(mnt_user_ns(path->mnt), path->dentry, + error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); @@ -701,7 +702,8 @@ static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid) int chown_common(const struct path *path, uid_t user, gid_t group) { - struct user_namespace *mnt_userns, *fs_userns; + struct mnt_idmap *idmap; + struct user_namespace *fs_userns; struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; int error; @@ -712,7 +714,7 @@ int chown_common(const struct path *path, uid_t user, gid_t group) uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); - mnt_userns = mnt_user_ns(path->mnt); + idmap = mnt_idmap(path->mnt); fs_userns = i_user_ns(inode); retry_deleg: @@ -726,14 +728,14 @@ retry_deleg: inode_lock(inode); if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV | - setattr_should_drop_sgid(mnt_userns, inode); + setattr_should_drop_sgid(idmap, inode); /* Continue to send actual fs values, not the mount values. */ error = security_path_chown( path, - from_vfsuid(mnt_userns, fs_userns, newattrs.ia_vfsuid), - from_vfsgid(mnt_userns, fs_userns, newattrs.ia_vfsgid)); + from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid), + from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid)); if (!error) - error = notify_change(mnt_userns, path->dentry, &newattrs, + error = notify_change(idmap, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { @@ -870,7 +872,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; - error = break_lease(locks_inode(f), f->f_flags); + error = break_lease(file_inode(f), f->f_flags); if (error) goto cleanup_all; @@ -1064,7 +1066,7 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, if (IS_ERR(f)) return f; - error = vfs_create(mnt_user_ns(path->mnt), + error = vfs_create(mnt_idmap(path->mnt), d_inode(path->dentry->d_parent), path->dentry, mode, true); if (!error) diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index c5da2091cefb..5aefb705bcc8 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -118,7 +118,7 @@ out: return error; } -int orangefs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int orangefs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int error; @@ -136,7 +136,7 @@ int orangefs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, * and "mode" to the new desired value. It is up to * us to propagate the new mode back to the server... */ - error = posix_acl_update_mode(&init_user_ns, inode, + error = posix_acl_update_mode(&nop_mnt_idmap, inode, &iattr.ia_mode, &acl); if (error) { gossip_err("%s: posix_acl_update_mode err: %d\n", diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 167fa43b24f9..4ecb91a9bbeb 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -14,6 +14,7 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/pagemap.h> static int flush_racache(struct inode *inode) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 4df560894386..11e21a0e65ce 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -822,7 +822,7 @@ again: ORANGEFS_I(inode)->attr_uid = current_fsuid(); ORANGEFS_I(inode)->attr_gid = current_fsgid(); } - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); spin_unlock(&inode->i_lock); mark_inode_dirty(inode); @@ -839,20 +839,20 @@ int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr) ret = __orangefs_setattr(inode, iattr); /* change mode on a file that has ACLs */ if (!ret && (iattr->ia_valid & ATTR_MODE)) - ret = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); + ret = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); return ret; } /* * Change attributes of an object referenced by dentry. */ -int orangefs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int orangefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int ret; gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n", dentry); - ret = setattr_prepare(&init_user_ns, dentry, iattr); + ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (ret) goto out; ret = __orangefs_setattr_mode(dentry, iattr); @@ -866,7 +866,7 @@ out: /* * Obtain attributes of an object given a dentry */ -int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { int ret; @@ -879,7 +879,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, ret = orangefs_inode_getattr(inode, request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0); if (ret == 0) { - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); /* override block size reported to stat */ if (!(request_mask & STATX_SIZE)) @@ -890,7 +890,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, return ret; } -int orangefs_permission(struct user_namespace *mnt_userns, +int orangefs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int ret; @@ -905,7 +905,7 @@ int orangefs_permission(struct user_namespace *mnt_userns, if (ret < 0) return ret; - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags) @@ -944,7 +944,7 @@ static int orangefs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -static int orangefs_fileattr_set(struct user_namespace *mnt_userns, +static int orangefs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { u64 val = 0; diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 75c1a3dcf68c..77518e248cf7 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -15,7 +15,7 @@ /* * Get a newly allocated inode to go with a negative dentry. */ -static int orangefs_create(struct user_namespace *mnt_userns, +static int orangefs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, @@ -216,7 +216,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) return ret; } -static int orangefs_symlink(struct user_namespace *mnt_userns, +static int orangefs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) @@ -305,7 +305,7 @@ out: return ret; } -static int orangefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct orangefs_inode_s *parent = ORANGEFS_I(dir); @@ -375,7 +375,7 @@ out: return ret; } -static int orangefs_rename(struct user_namespace *mnt_userns, +static int orangefs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 6e0cc01b3a14..ce20d3443869 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -106,7 +106,7 @@ enum orangefs_vfs_op_states { extern const struct xattr_handler *orangefs_xattr_handlers[]; extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu); -extern int orangefs_set_acl(struct user_namespace *mnt_userns, +extern int orangefs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type); @@ -362,12 +362,12 @@ struct inode *orangefs_new_inode(struct super_block *sb, int __orangefs_setattr(struct inode *, struct iattr *); int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr); -int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *); +int orangefs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); -int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); -int orangefs_permission(struct user_namespace *mnt_userns, +int orangefs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); int orangefs_update_time(struct inode *, struct timespec64 *, int); diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 9a5b757fbd2f..6ecad4f94ae6 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -526,7 +526,7 @@ out_unlock: } static int orangefs_xattr_set_default(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 6e4e65ee050d..c14e90764e35 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -792,7 +792,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) if (!c->metacopy && c->stat.size) { err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size); if (err) - return err; + goto out_fput; } err = ovl_copy_up_metadata(c, temp); @@ -1011,6 +1011,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, if (err) return err; + if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) || + !kgid_has_mapping(current_user_ns(), ctx.stat.gid)) + return -EOVERFLOW; + ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); if (parent) { diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f61e37f4c8ff..fc25fb95d5fc 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -641,7 +641,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, inode->i_state |= I_CREATING; spin_unlock(&inode->i_lock); - inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode); + inode_init_owner(&nop_mnt_idmap, inode, dentry->d_parent->d_inode, mode); attr.mode = inode->i_mode; err = ovl_create_or_link(dentry, inode, &attr, false); @@ -655,19 +655,19 @@ out: return err; } -static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir, +static int ovl_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); } -static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); } -static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int ovl_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { /* Don't allow creation of "whiteout" on overlay */ @@ -677,7 +677,7 @@ static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir, return ovl_create_object(dentry, mode, rdev, NULL); } -static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int ovl_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *link) { return ovl_create_object(dentry, S_IFLNK, 0, link); @@ -1075,7 +1075,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) return err; } -static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir, +static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *old, struct inode *newdir, struct dentry *new, unsigned int flags) { diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index a25bb3453dde..defd4e231ad2 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -392,8 +392,8 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); /* - * No mnt_userns handling here: it's an internal lookup. Could skip - * permission checking altogether, but for now just use non-mnt_userns + * No idmap handling here: it's an internal lookup. Could skip + * permission checking altogether, but for now just use non-idmap * transformed ids. */ this = lookup_one_len(name.name.name, connected, name.name.len); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index c9d0c362c7ef..7c04f033aadd 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -42,7 +42,7 @@ static struct file *ovl_open_realfile(const struct file *file, { struct inode *realinode = d_inode(realpath->dentry); struct inode *inode = file_inode(file); - struct user_namespace *real_mnt_userns; + struct mnt_idmap *real_idmap; struct file *realfile; const struct cred *old_cred; int flags = file->f_flags | OVL_OPEN_FLAGS; @@ -53,12 +53,12 @@ static struct file *ovl_open_realfile(const struct file *file, acc_mode |= MAY_APPEND; old_cred = ovl_override_creds(inode->i_sb); - real_mnt_userns = mnt_user_ns(realpath->mnt); - err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode); + real_idmap = mnt_idmap(realpath->mnt); + err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); if (err) { realfile = ERR_PTR(err); } else { - if (!inode_owner_or_capable(real_mnt_userns, realinode)) + if (!inode_owner_or_capable(real_idmap, realinode)) flags &= ~O_NOATIME; realfile = open_with_fake_path(&file->f_path, flags, realinode, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ee6dfa577c93..541cf3717fc2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -19,7 +19,7 @@ #include "overlayfs.h" -int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int err; @@ -28,7 +28,7 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct dentry *upperdentry; const struct cred *old_cred; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; @@ -153,7 +153,7 @@ static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) } } -int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; @@ -278,7 +278,7 @@ out: return err; } -int ovl_permission(struct user_namespace *mnt_userns, +int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct inode *upperinode = ovl_inode_upper(inode); @@ -298,7 +298,7 @@ int ovl_permission(struct user_namespace *mnt_userns, * Check overlay inode with the creds of task and underlying inode * with creds of mounter */ - err = generic_permission(&init_user_ns, inode, mask); + err = generic_permission(&nop_mnt_idmap, inode, mask); if (err) return err; @@ -310,7 +310,7 @@ int ovl_permission(struct user_namespace *mnt_userns, /* Make sure mounter can read file for copy up later */ mask |= MAY_READ; } - err = inode_permission(mnt_user_ns(realpath.mnt), realinode, mask); + err = inode_permission(mnt_idmap(realpath.mnt), realinode, mask); revert_creds(old_cred); return err; @@ -361,7 +361,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, if (!value && !upperdentry) { ovl_path_lower(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getxattr(mnt_user_ns(realpath.mnt), realdentry, name, NULL, 0); + err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); revert_creds(old_cred); if (err < 0) goto out_drop_write; @@ -403,7 +403,7 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, ovl_i_path_real(inode, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_getxattr(mnt_user_ns(realpath.mnt), realpath.dentry, name, value, size); + res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); revert_creds(old_cred); return res; } @@ -463,7 +463,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) * alter the POSIX ACLs for the underlying filesystem. */ static void ovl_idmap_posix_acl(const struct inode *realinode, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct posix_acl *acl) { struct user_namespace *fs_userns = i_user_ns(realinode); @@ -475,11 +475,11 @@ static void ovl_idmap_posix_acl(const struct inode *realinode, struct posix_acl_entry *e = &acl->a_entries[i]; switch (e->e_tag) { case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid); + vfsuid = make_vfsuid(idmap, fs_userns, e->e_uid); e->e_uid = vfsuid_into_kuid(vfsuid); break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid); + vfsgid = make_vfsgid(idmap, fs_userns, e->e_gid); e->e_gid = vfsgid_into_kgid(vfsgid); break; } @@ -514,15 +514,15 @@ struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm) { struct posix_acl *real_acl, *clone; - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct inode *realinode = d_inode(path->dentry); - mnt_userns = mnt_user_ns(path->mnt); + idmap = mnt_idmap(path->mnt); if (noperm) real_acl = get_inode_acl(realinode, posix_acl_type(acl_name)); else - real_acl = vfs_get_acl(mnt_userns, path->dentry, acl_name); + real_acl = vfs_get_acl(idmap, path->dentry, acl_name); if (IS_ERR_OR_NULL(real_acl)) return real_acl; @@ -540,7 +540,7 @@ struct posix_acl *ovl_get_acl_path(const struct path *path, if (!clone) return ERR_PTR(-ENOMEM); - ovl_idmap_posix_acl(realinode, mnt_userns, clone); + ovl_idmap_posix_acl(realinode, idmap, clone); return clone; } @@ -555,7 +555,7 @@ struct posix_acl *ovl_get_acl_path(const struct path *path, * * This is obviously only relevant when idmapped layers are used. */ -struct posix_acl *do_ovl_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, struct inode *inode, int type, bool rcu, bool noperm) { @@ -618,7 +618,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, ovl_path_lower(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - real_acl = vfs_get_acl(mnt_user_ns(realpath.mnt), realdentry, + real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, acl_name); revert_creds(old_cred); if (IS_ERR(real_acl)) { @@ -651,7 +651,7 @@ out_drop_write: return err; } -int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int err; @@ -665,7 +665,7 @@ int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, return -EOPNOTSUPP; if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EPERM; /* @@ -674,10 +674,10 @@ int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, */ if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS && !in_group_p(inode->i_gid) && - !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) { + !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) { struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; - err = ovl_setattr(&init_user_ns, dentry, &iattr); + err = ovl_setattr(&nop_mnt_idmap, dentry, &iattr); if (err) return err; } @@ -755,10 +755,10 @@ int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) if (err) return err; - return vfs_fileattr_set(mnt_user_ns(realpath->mnt), realpath->dentry, fa); + return vfs_fileattr_set(mnt_idmap(realpath->mnt), realpath->dentry, fa); } -int ovl_fileattr_set(struct user_namespace *mnt_userns, +int ovl_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 46753134533a..cfb3420b7df0 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -204,7 +204,7 @@ static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, struct dentry *base, int len, bool drop_negative) { - struct dentry *ret = lookup_one_unlocked(mnt_user_ns(d->mnt), name, base, len); + struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { if (drop_negative && ret->d_lockref.count == 1) { @@ -711,7 +711,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, if (err) return ERR_PTR(err); - index = lookup_one_positive_unlocked(ovl_upper_mnt_userns(ofs), name.name, + index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name, ofs->indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); @@ -1182,7 +1182,7 @@ bool ovl_lower_positive(struct dentry *dentry) struct dentry *this; struct dentry *lowerdir = poe->lowerstack[i].dentry; - this = lookup_one_positive_unlocked(mnt_user_ns(poe->lowerstack[i].layer->mnt), + this = lookup_one_positive_unlocked(mnt_idmap(poe->lowerstack[i].layer->mnt), name->name, lowerdir, name->len); if (IS_ERR(this)) { switch (PTR_ERR(this)) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 1df7f850ff3b..4d0b278f5630 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -141,13 +141,13 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs, struct dentry *upperdentry, struct iattr *attr) { - return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL); + return notify_change(ovl_upper_mnt_idmap(ofs), upperdentry, attr, NULL); } static inline int ovl_do_rmdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - int err = vfs_rmdir(ovl_upper_mnt_userns(ofs), dir, dentry); + int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; @@ -156,7 +156,7 @@ static inline int ovl_do_rmdir(struct ovl_fs *ofs, static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - int err = vfs_unlink(ovl_upper_mnt_userns(ofs), dir, dentry, NULL); + int err = vfs_unlink(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL); pr_debug("unlink(%pd2) = %i\n", dentry, err); return err; @@ -165,7 +165,8 @@ static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir, static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { - int err = vfs_link(old_dentry, ovl_upper_mnt_userns(ofs), dir, new_dentry, NULL); + int err = vfs_link(old_dentry, ovl_upper_mnt_idmap(ofs), dir, + new_dentry, NULL); pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err); return err; @@ -175,7 +176,7 @@ static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_create(ovl_upper_mnt_userns(ofs), dir, dentry, mode, true); + int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; @@ -185,7 +186,7 @@ static inline int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_mkdir(ovl_upper_mnt_userns(ofs), dir, dentry, mode); + int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } @@ -194,7 +195,7 @@ static inline int ovl_do_mknod(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = vfs_mknod(ovl_upper_mnt_userns(ofs), dir, dentry, mode, dev); + int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; @@ -204,7 +205,7 @@ static inline int ovl_do_symlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, const char *oldname) { - int err = vfs_symlink(ovl_upper_mnt_userns(ofs), dir, dentry, oldname); + int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; @@ -217,7 +218,7 @@ static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name, WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb); - err = vfs_getxattr(mnt_user_ns(path->mnt), path->dentry, + err = vfs_getxattr(mnt_idmap(path->mnt), path->dentry, name, value, size); len = (value && err > 0) ? err : 0; @@ -251,7 +252,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name, + int err = vfs_setxattr(ovl_upper_mnt_idmap(ofs), dentry, name, value, size, flags); pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n", @@ -269,7 +270,7 @@ static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry, static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry, const char *name) { - int err = vfs_removexattr(ovl_upper_mnt_userns(ofs), dentry, name); + int err = vfs_removexattr(ovl_upper_mnt_idmap(ofs), dentry, name); pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); return err; } @@ -283,13 +284,13 @@ static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry, static inline int ovl_do_set_acl(struct ovl_fs *ofs, struct dentry *dentry, const char *acl_name, struct posix_acl *acl) { - return vfs_set_acl(ovl_upper_mnt_userns(ofs), dentry, acl_name, acl); + return vfs_set_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name, acl); } static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry, const char *acl_name) { - return vfs_remove_acl(ovl_upper_mnt_userns(ofs), dentry, acl_name); + return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name); } static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, @@ -298,10 +299,10 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, { int err; struct renamedata rd = { - .old_mnt_userns = ovl_upper_mnt_userns(ofs), + .old_mnt_idmap = ovl_upper_mnt_idmap(ofs), .old_dir = olddir, .old_dentry = olddentry, - .new_mnt_userns = ovl_upper_mnt_userns(ofs), + .new_mnt_idmap = ovl_upper_mnt_idmap(ofs), .new_dir = newdir, .new_dentry = newdentry, .flags = flags, @@ -319,7 +320,7 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, static inline int ovl_do_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - int err = vfs_whiteout(ovl_upper_mnt_userns(ofs), dir, dentry); + int err = vfs_whiteout(ovl_upper_mnt_idmap(ofs), dir, dentry); pr_debug("whiteout(%pd2) = %i\n", dentry, err); return err; } @@ -328,7 +329,7 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry }; - struct file *file = vfs_tmpfile_open(ovl_upper_mnt_userns(ofs), &path, mode, + struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode, O_LARGEFILE | O_WRONLY, current_cred()); int err = PTR_ERR_OR_ZERO(file); @@ -340,7 +341,7 @@ static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs, const char *name, struct dentry *base, int len) { - return lookup_one(ovl_upper_mnt_userns(ofs), name, base, len); + return lookup_one(ovl_upper_mnt_idmap(ofs), name, base, len); } static inline bool ovl_open_flags_need_copy_up(int flags) @@ -596,11 +597,11 @@ int ovl_set_nlink_lower(struct dentry *dentry); unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback); -int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); -int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode, +int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags); @@ -609,20 +610,20 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); #ifdef CONFIG_FS_POSIX_ACL -struct posix_acl *do_ovl_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, struct inode *inode, int type, bool rcu, bool noperm); static inline struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type, bool rcu) { - return do_ovl_get_acl(&init_user_ns, inode, type, rcu, true); + return do_ovl_get_acl(&nop_mnt_idmap, inode, type, rcu, true); } -static inline struct posix_acl *ovl_get_acl(struct user_namespace *mnt_userns, +static inline struct posix_acl *ovl_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { - return do_ovl_get_acl(mnt_userns, d_inode(dentry), type, false, false); + return do_ovl_get_acl(idmap, d_inode(dentry), type, false, false); } -int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm); @@ -717,7 +718,7 @@ void ovl_aio_request_cache_destroy(void); int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int ovl_fileattr_set(struct user_namespace *mnt_userns, +int ovl_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* copy_up.c */ diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index e1af8f660698..fd11fe6d6d45 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -90,9 +90,9 @@ static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs) return ofs->layers[0].mnt; } -static inline struct user_namespace *ovl_upper_mnt_userns(struct ovl_fs *ofs) +static inline struct mnt_idmap *ovl_upper_mnt_idmap(struct ovl_fs *ofs) { - return mnt_user_ns(ovl_upper_mnt(ofs)); + return mnt_idmap(ovl_upper_mnt(ofs)); } static inline struct ovl_fs *OVL_FS(struct super_block *sb) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 8cd2b9947de1..b6952b21a7ee 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -278,7 +278,7 @@ static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data while (rdd->first_maybe_whiteout) { p = rdd->first_maybe_whiteout; rdd->first_maybe_whiteout = p->next_maybe_whiteout; - dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len); + dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); if (!IS_ERR(dentry)) { p->is_whiteout = ovl_is_whiteout(dentry); dput(dentry); @@ -480,7 +480,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry goto get; } } - this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len); + this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { /* Mark a stale entry */ p->is_whiteout = true; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 85b891152a2c..f1d9f75f8786 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1012,7 +1012,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler, } static int ovl_own_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -1028,7 +1028,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, } static int ovl_other_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index bde291623c8c..923d66d131c1 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -491,7 +491,7 @@ bool ovl_is_whiteout(struct dentry *dentry) struct file *ovl_path_open(const struct path *path, int flags) { struct inode *inode = d_inode(path->dentry); - struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt); + struct mnt_idmap *real_idmap = mnt_idmap(path->mnt); int err, acc_mode; if (flags & ~(O_ACCMODE | O_LARGEFILE)) @@ -508,12 +508,12 @@ struct file *ovl_path_open(const struct path *path, int flags) BUG(); } - err = inode_permission(real_mnt_userns, inode, acc_mode | MAY_OPEN); + err = inode_permission(real_idmap, inode, acc_mode | MAY_OPEN); if (err) return ERR_PTR(err); /* O_NOATIME is an optimization, don't fail if not permitted */ - if (inode_owner_or_capable(real_mnt_userns, inode)) + if (inode_owner_or_capable(real_idmap, inode)) flags |= O_NOATIME; return dentry_open(path, flags, current_cred()); @@ -1101,16 +1101,16 @@ void ovl_copyattr(struct inode *inode) { struct path realpath; struct inode *realinode; - struct user_namespace *real_mnt_userns; + struct mnt_idmap *real_idmap; vfsuid_t vfsuid; vfsgid_t vfsgid; ovl_i_path_real(inode, &realpath); realinode = d_inode(realpath.dentry); - real_mnt_userns = mnt_user_ns(realpath.mnt); + real_idmap = mnt_idmap(realpath.mnt); - vfsuid = i_uid_into_vfsuid(real_mnt_userns, realinode); - vfsgid = i_gid_into_vfsgid(real_mnt_userns, realinode); + vfsuid = i_uid_into_vfsuid(real_idmap, realinode); + vfsgid = i_gid_into_vfsgid(real_idmap, realinode); inode->i_uid = vfsuid_into_kuid(vfsuid); inode->i_gid = vfsgid_into_kgid(vfsgid); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index d7bc81fc0840..fda167069617 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -28,6 +28,7 @@ #include <linux/security.h> #include <linux/evm.h> #include <linux/fsnotify.h> +#include <linux/filelock.h> #include "internal.h" @@ -111,7 +112,7 @@ void forget_all_cached_acls(struct inode *inode) } EXPORT_SYMBOL(forget_all_cached_acls); -static struct posix_acl *__get_acl(struct user_namespace *mnt_userns, +static struct posix_acl *__get_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, int type) { @@ -154,7 +155,7 @@ static struct posix_acl *__get_acl(struct user_namespace *mnt_userns, * we'll just create the negative cache entry. */ if (dentry && inode->i_op->get_acl) { - acl = inode->i_op->get_acl(mnt_userns, dentry, type); + acl = inode->i_op->get_acl(idmap, dentry, type); } else if (inode->i_op->get_inode_acl) { acl = inode->i_op->get_inode_acl(inode, type, false); } else { @@ -181,7 +182,7 @@ static struct posix_acl *__get_acl(struct user_namespace *mnt_userns, struct posix_acl *get_inode_acl(struct inode *inode, int type) { - return __get_acl(&init_user_ns, NULL, inode, type); + return __get_acl(&nop_mnt_idmap, NULL, inode, type); } EXPORT_SYMBOL(get_inode_acl); @@ -372,7 +373,7 @@ EXPORT_SYMBOL(posix_acl_from_mode); * by the acl. Returns -E... otherwise. */ int -posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, +posix_acl_permission(struct mnt_idmap *idmap, struct inode *inode, const struct posix_acl *acl, int want) { const struct posix_acl_entry *pa, *pe, *mask_obj; @@ -387,18 +388,18 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, switch(pa->e_tag) { case ACL_USER_OBJ: /* (May have been checked already) */ - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto check_perm; break; case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, fs_userns, + vfsuid = make_vfsuid(idmap, fs_userns, pa->e_uid); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto mask; break; case ACL_GROUP_OBJ: - vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); if (vfsgid_in_group_p(vfsgid)) { found = 1; if ((pa->e_perm & want) == want) @@ -406,7 +407,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, } break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, fs_userns, + vfsgid = make_vfsgid(idmap, fs_userns, pa->e_gid); if (vfsgid_in_group_p(vfsgid)) { found = 1; @@ -591,18 +592,18 @@ EXPORT_SYMBOL(__posix_acl_chmod); /** * posix_acl_chmod - chmod a posix acl * - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @dentry: dentry to check permissions on * @mode: the new mode of @inode * - * If the dentry has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the dentry has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply passs @nop_mnt_idmap. */ int - posix_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry, + posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode) { struct inode *inode = d_inode(dentry); @@ -624,7 +625,7 @@ int ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); if (ret) return ret; - ret = inode->i_op->set_acl(mnt_userns, dentry, acl, ACL_TYPE_ACCESS); + ret = inode->i_op->set_acl(idmap, dentry, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); return ret; } @@ -683,7 +684,7 @@ EXPORT_SYMBOL_GPL(posix_acl_create); /** * posix_acl_update_mode - update mode in set_acl - * @mnt_userns: user namespace of the mount @inode was found from + * @idmap: idmap of the mount @inode was found from * @inode: target inode * @mode_p: mode (pointer) for update * @acl: acl pointer @@ -695,15 +696,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create); * As with chmod, clear the setgid bit if the caller is not in the owning group * or capable of CAP_FSETID (see inode_change_ok). * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before checking + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply passs @nop_mnt_idmap. * * Called from set_acl inode operations. */ -int posix_acl_update_mode(struct user_namespace *mnt_userns, +int posix_acl_update_mode(struct mnt_idmap *idmap, struct inode *inode, umode_t *mode_p, struct posix_acl **acl) { @@ -715,8 +716,8 @@ int posix_acl_update_mode(struct user_namespace *mnt_userns, return error; if (error == 0) *acl = NULL; - if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) && - !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) + if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) && + !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) mode &= ~S_ISGID; *mode_p = mode; return 0; @@ -893,7 +894,6 @@ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap, struct posix_acl_xattr_header *ext_acl = buffer; struct posix_acl_xattr_entry *ext_entry; struct user_namespace *fs_userns, *caller_userns; - struct user_namespace *mnt_userns; ssize_t real_size, n; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -909,19 +909,18 @@ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap, fs_userns = i_user_ns(inode); caller_userns = current_user_ns(); - mnt_userns = mnt_idmap_owner(idmap); for (n=0; n < acl->a_count; n++, ext_entry++) { const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext_entry->e_tag = cpu_to_le16(acl_e->e_tag); ext_entry->e_perm = cpu_to_le16(acl_e->e_perm); switch(acl_e->e_tag) { case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, fs_userns, acl_e->e_uid); + vfsuid = make_vfsuid(idmap, fs_userns, acl_e->e_uid); ext_entry->e_id = cpu_to_le32(from_kuid( caller_userns, vfsuid_into_kuid(vfsuid))); break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, fs_userns, acl_e->e_gid); + vfsgid = make_vfsgid(idmap, fs_userns, acl_e->e_gid); ext_entry->e_id = cpu_to_le32(from_kgid( caller_userns, vfsgid_into_kgid(vfsgid))); break; @@ -934,7 +933,7 @@ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap, } int -set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +set_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type, struct posix_acl *acl) { struct inode *inode = d_inode(dentry); @@ -946,7 +945,7 @@ set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; - if (!inode_owner_or_capable(mnt_userns, inode)) + if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (acl) { @@ -954,7 +953,7 @@ set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (ret) return ret; } - return inode->i_op->set_acl(mnt_userns, dentry, acl, type); + return inode->i_op->set_acl(idmap, dentry, acl, type); } EXPORT_SYMBOL(set_posix_acl); @@ -978,14 +977,14 @@ const struct xattr_handler posix_acl_default_xattr_handler = { }; EXPORT_SYMBOL_GPL(posix_acl_default_xattr_handler); -int simple_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int error; struct inode *inode = d_inode(dentry); if (type == ACL_TYPE_ACCESS) { - error = posix_acl_update_mode(mnt_userns, inode, + error = posix_acl_update_mode(idmap, inode, &inode->i_mode, &acl); if (error) return error; @@ -1017,7 +1016,7 @@ int simple_acl_create(struct inode *dir, struct inode *inode) return 0; } -static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns, +static int vfs_set_acl_idmapped_mnt(struct mnt_idmap *idmap, struct user_namespace *fs_userns, struct posix_acl *acl) { @@ -1026,11 +1025,11 @@ static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns, switch (acl_e->e_tag) { case ACL_USER: - acl_e->e_uid = from_vfsuid(mnt_userns, fs_userns, + acl_e->e_uid = from_vfsuid(idmap, fs_userns, VFSUIDT_INIT(acl_e->e_uid)); break; case ACL_GROUP: - acl_e->e_gid = from_vfsgid(mnt_userns, fs_userns, + acl_e->e_gid = from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(acl_e->e_gid)); break; } @@ -1041,7 +1040,7 @@ static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns, /** * vfs_set_acl - set posix acls - * @mnt_userns: user namespace of the mount + * @idmap: idmap of the mount * @dentry: the dentry based on which to set the posix acls * @acl_name: the name of the posix acl * @kacl: the posix acls in the appropriate VFS format @@ -1051,7 +1050,7 @@ static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns, * * Return: On success 0, on error negative errno. */ -int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) { int acl_type; @@ -1071,7 +1070,7 @@ int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, * if this is a filesystem with a backing store - ultimately * translate them to backing store values. */ - error = vfs_set_acl_idmapped_mnt(mnt_userns, i_user_ns(inode), kacl); + error = vfs_set_acl_idmapped_mnt(idmap, i_user_ns(inode), kacl); if (error) return error; } @@ -1083,11 +1082,11 @@ retry_deleg: * We only care about restrictions the inode struct itself places upon * us otherwise POSIX ACLs aren't subject to any VFS restrictions. */ - error = may_write_xattr(mnt_userns, inode); + error = may_write_xattr(idmap, inode); if (error) goto out_inode_unlock; - error = security_inode_set_acl(mnt_userns, dentry, acl_name, kacl); + error = security_inode_set_acl(idmap, dentry, acl_name, kacl); if (error) goto out_inode_unlock; @@ -1096,7 +1095,7 @@ retry_deleg: goto out_inode_unlock; if (inode->i_opflags & IOP_XATTR) - error = set_posix_acl(mnt_userns, dentry, acl_type, kacl); + error = set_posix_acl(idmap, dentry, acl_type, kacl); else if (unlikely(is_bad_inode(inode))) error = -EIO; else @@ -1121,7 +1120,7 @@ EXPORT_SYMBOL_GPL(vfs_set_acl); /** * vfs_get_acl - get posix acls - * @mnt_userns: user namespace of the mount + * @idmap: idmap of the mount * @dentry: the dentry based on which to retrieve the posix acls * @acl_name: the name of the posix acl * @@ -1130,7 +1129,7 @@ EXPORT_SYMBOL_GPL(vfs_set_acl); * * Return: On success POSIX ACLs in VFS format, on error negative errno. */ -struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { struct inode *inode = d_inode(dentry); @@ -1145,7 +1144,7 @@ struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, * The VFS has no restrictions on reading POSIX ACLs so calling * something like xattr_permission() isn't needed. Only LSMs get a say. */ - error = security_inode_get_acl(mnt_userns, dentry, acl_name); + error = security_inode_get_acl(idmap, dentry, acl_name); if (error) return ERR_PTR(error); @@ -1154,7 +1153,7 @@ struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, if (S_ISLNK(inode->i_mode)) return ERR_PTR(-EOPNOTSUPP); - acl = __get_acl(mnt_userns, dentry, inode, acl_type); + acl = __get_acl(idmap, dentry, inode, acl_type); if (IS_ERR(acl)) return acl; if (!acl) @@ -1166,7 +1165,7 @@ EXPORT_SYMBOL_GPL(vfs_get_acl); /** * vfs_remove_acl - remove posix acls - * @mnt_userns: user namespace of the mount + * @idmap: idmap of the mount * @dentry: the dentry based on which to retrieve the posix acls * @acl_name: the name of the posix acl * @@ -1174,7 +1173,7 @@ EXPORT_SYMBOL_GPL(vfs_get_acl); * * Return: On success 0, on error negative errno. */ -int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { int acl_type; @@ -1193,11 +1192,11 @@ retry_deleg: * We only care about restrictions the inode struct itself places upon * us otherwise POSIX ACLs aren't subject to any VFS restrictions. */ - error = may_write_xattr(mnt_userns, inode); + error = may_write_xattr(idmap, inode); if (error) goto out_inode_unlock; - error = security_inode_remove_acl(mnt_userns, dentry, acl_name); + error = security_inode_remove_acl(idmap, dentry, acl_name); if (error) goto out_inode_unlock; @@ -1206,14 +1205,14 @@ retry_deleg: goto out_inode_unlock; if (inode->i_opflags & IOP_XATTR) - error = set_posix_acl(mnt_userns, dentry, acl_type, NULL); + error = set_posix_acl(idmap, dentry, acl_type, NULL); else if (unlikely(is_bad_inode(inode))) error = -EIO; else error = -EOPNOTSUPP; if (!error) { fsnotify_xattr(dentry); - evm_inode_post_remove_acl(mnt_userns, dentry, acl_name); + evm_inode_post_remove_acl(idmap, dentry, acl_name); } out_inode_unlock: @@ -1245,7 +1244,7 @@ int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, return PTR_ERR(acl); } - error = vfs_set_acl(mnt_idmap_owner(idmap), dentry, acl_name, acl); + error = vfs_set_acl(idmap, dentry, acl_name, acl); posix_acl_release(acl); return error; } @@ -1256,7 +1255,7 @@ ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, ssize_t error; struct posix_acl *acl; - acl = vfs_get_acl(mnt_idmap_owner(idmap), dentry, acl_name); + acl = vfs_get_acl(idmap, dentry, acl_name); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/proc/base.c b/fs/proc/base.c index 9e479d7d202b..5e0e0ccd47aa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -685,7 +685,7 @@ static bool proc_fd_access_allowed(struct inode *inode) return allowed; } -int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; @@ -694,11 +694,11 @@ int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (attr->ia_valid & ATTR_MODE) return -EPERM; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } @@ -727,7 +727,7 @@ static bool has_pid_permissions(struct proc_fs_info *fs_info, } -static int proc_pid_permission(struct user_namespace *mnt_userns, +static int proc_pid_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); @@ -753,7 +753,7 @@ static int proc_pid_permission(struct user_namespace *mnt_userns, return -EPERM; } - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } @@ -1959,14 +1959,14 @@ static struct inode *proc_pid_make_base_inode(struct super_block *sb, return inode; } -int pid_getattr(struct user_namespace *mnt_userns, const struct path *path, +int pid_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->uid = GLOBAL_ROOT_UID; stat->gid = GLOBAL_ROOT_GID; @@ -3557,7 +3557,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) * This function makes sure that the node is always accessible for members of * same thread group. */ -static int proc_tid_comm_permission(struct user_namespace *mnt_userns, +static int proc_tid_comm_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { bool is_same_tgroup; @@ -3577,7 +3577,7 @@ static int proc_tid_comm_permission(struct user_namespace *mnt_userns, return 0; } - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_tid_comm_inode_operations = { @@ -3891,13 +3891,13 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) return 0; } -static int proc_task_getattr(struct user_namespace *mnt_userns, +static int proc_task_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); if (p) { stat->nlink += get_nr_threads(p); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index fc46d6fe080c..b3140deebbbf 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -12,6 +12,7 @@ #include <linux/file.h> #include <linux/seq_file.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/proc_fs.h> @@ -325,13 +326,13 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -int proc_fd_permission(struct user_namespace *mnt_userns, +int proc_fd_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct task_struct *p; int rv; - rv = generic_permission(&init_user_ns, inode, mask); + rv = generic_permission(&nop_mnt_idmap, inode, mask); if (rv == 0) return rv; @@ -344,14 +345,14 @@ int proc_fd_permission(struct user_namespace *mnt_userns, return rv; } -static int proc_fd_getattr(struct user_namespace *mnt_userns, +static int proc_fd_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); int rv = 0; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); /* If it's a directory, put the number of open fds there */ if (S_ISDIR(inode->i_mode)) { diff --git a/fs/proc/fd.h b/fs/proc/fd.h index c5a921a06a0b..7e7265f7e06f 100644 --- a/fs/proc/fd.h +++ b/fs/proc/fd.h @@ -10,7 +10,7 @@ extern const struct inode_operations proc_fd_inode_operations; extern const struct file_operations proc_fdinfo_operations; extern const struct inode_operations proc_fdinfo_inode_operations; -extern int proc_fd_permission(struct user_namespace *mnt_userns, +extern int proc_fd_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); static inline unsigned int proc_fd(struct inode *inode) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 587b91d9d998..8379593fa4bb 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -115,18 +115,18 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, return true; } -static int proc_notify_change(struct user_namespace *mnt_userns, +static int proc_notify_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); struct proc_dir_entry *de = PDE(inode); int error; - error = setattr_prepare(&init_user_ns, dentry, iattr); + error = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (error) return error; - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); proc_set_user(de, inode->i_uid, inode->i_gid); @@ -134,7 +134,7 @@ static int proc_notify_change(struct user_namespace *mnt_userns, return 0; } -static int proc_getattr(struct user_namespace *mnt_userns, +static int proc_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -147,7 +147,7 @@ static int proc_getattr(struct user_namespace *mnt_userns, } } - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); return 0; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index b701d0207edf..9dda7e54b2d0 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -162,9 +162,9 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, * base.c */ extern const struct dentry_operations pid_dentry_operations; -extern int pid_getattr(struct user_namespace *, const struct path *, +extern int pid_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); -extern int proc_setattr(struct user_namespace *, struct dentry *, +extern int proc_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 856839b8ae8b..a0c0419872e3 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -299,7 +299,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir, return de; } -static int proc_tgid_net_getattr(struct user_namespace *mnt_userns, +static int proc_tgid_net_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -308,7 +308,7 @@ static int proc_tgid_net_getattr(struct user_namespace *mnt_userns, net = get_proc_task_net(inode); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); if (net != NULL) { stat->nlink = net->proc_net->nlink; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 48f2d60bd78a..e89bd8f1368b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -798,7 +798,7 @@ out: return 0; } -static int proc_sys_permission(struct user_namespace *mnt_userns, +static int proc_sys_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { /* @@ -827,7 +827,7 @@ static int proc_sys_permission(struct user_namespace *mnt_userns, return error; } -static int proc_sys_setattr(struct user_namespace *mnt_userns, +static int proc_sys_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -836,16 +836,16 @@ static int proc_sys_setattr(struct user_namespace *mnt_userns, if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) return -EPERM; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } -static int proc_sys_getattr(struct user_namespace *mnt_userns, +static int proc_sys_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -856,7 +856,7 @@ static int proc_sys_getattr(struct user_namespace *mnt_userns, if (IS_ERR(head)) return PTR_ERR(head); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); if (table) stat->mode = (stat->mode & S_IFMT) | table->mode; diff --git a/fs/proc/root.c b/fs/proc/root.c index 3c2ee3eb1138..a86e65a608da 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -310,11 +310,11 @@ void __init proc_root_init(void) register_filesystem(&proc_fs_type); } -static int proc_root_getattr(struct user_namespace *mnt_userns, +static int proc_root_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); stat->nlink = proc_root.nlink + nr_processes(); return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e35a0398db63..af1c49ae11b1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, page = pfn_swap_entry_to_page(swpent); } if (page) { - int mapcount = page_mapcount(page); - - if (mapcount >= 2) + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f27faf5db554..a6357f728034 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2085,7 +2085,7 @@ EXPORT_SYMBOL(__dquot_transfer); /* Wrapper for transferring ownership of an inode for uid/gid only * Called from FSXXX_setattr() */ -int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode, +int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode, struct iattr *iattr) { struct dquot *transfer_to[MAXQUOTAS] = {}; @@ -2096,8 +2096,8 @@ int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode, if (!dquot_active(inode)) return 0; - if (i_uid_needs_update(mnt_userns, iattr, inode)) { - kuid_t kuid = from_vfsuid(mnt_userns, i_user_ns(inode), + if (i_uid_needs_update(idmap, iattr, inode)) { + kuid_t kuid = from_vfsuid(idmap, i_user_ns(inode), iattr->ia_vfsuid); dquot = dqget(sb, make_kqid_uid(kuid)); @@ -2110,8 +2110,8 @@ int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode, } transfer_to[USRQUOTA] = dquot; } - if (i_gid_needs_update(mnt_userns, iattr, inode)) { - kgid_t kgid = from_vfsgid(mnt_userns, i_user_ns(inode), + if (i_gid_needs_update(idmap, iattr, inode)) { + kgid_t kgid = from_vfsgid(idmap, i_user_ns(inode), iattr->ia_vfsgid); dquot = dqget(sb, make_kqid_gid(kgid)); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index cb240eac5036..5bf74c2f6042 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -22,7 +22,7 @@ #include <linux/uaccess.h> #include "internal.h" -static int ramfs_nommu_setattr(struct user_namespace *, struct dentry *, struct iattr *); +static int ramfs_nommu_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, @@ -158,7 +158,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) * handle a change of attributes * - we're specifically interested in a change of size */ -static int ramfs_nommu_setattr(struct user_namespace *mnt_userns, +static int ramfs_nommu_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *ia) { struct inode *inode = d_inode(dentry); @@ -166,7 +166,7 @@ static int ramfs_nommu_setattr(struct user_namespace *mnt_userns, int ret = 0; /* POSIX UID/GID verification for setting inode attributes */ - ret = setattr_prepare(&init_user_ns, dentry, ia); + ret = setattr_prepare(&nop_mnt_idmap, dentry, ia); if (ret) return ret; @@ -186,7 +186,7 @@ static int ramfs_nommu_setattr(struct user_namespace *mnt_userns, } } - setattr_copy(&init_user_ns, inode, ia); + setattr_copy(&nop_mnt_idmap, inode, ia); out: ia->ia_valid = old_ia_valid; return ret; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index b3257e852820..5ba580c78835 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -61,7 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, if (inode) { inode->i_ino = get_next_ino(); - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_mapping->a_ops = &ram_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_unevictable(inode->i_mapping); @@ -95,7 +95,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, */ /* SMP-safe */ static int -ramfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev); @@ -110,22 +110,22 @@ ramfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, return error; } -static int ramfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ramfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - int retval = ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0); + int retval = ramfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFDIR, 0); if (!retval) inc_nlink(dir); return retval; } -static int ramfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int ramfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0); + return ramfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0); } -static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct inode *inode; @@ -145,7 +145,7 @@ static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, return error; } -static int ramfs_tmpfile(struct user_namespace *mnt_userns, +static int ramfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode; diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h index 29c503a06db4..2571b1a8be84 100644 --- a/fs/reiserfs/acl.h +++ b/fs/reiserfs/acl.h @@ -49,7 +49,7 @@ static inline int reiserfs_acl_count(size_t size) #ifdef CONFIG_REISERFS_FS_POSIX_ACL struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu); -int reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int reiserfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int reiserfs_acl_chmod(struct dentry *dentry); int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index c7d1fa526dea..d54cab854f60 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3262,21 +3262,21 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return ret; } -int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); unsigned int ia_valid; int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; /* must be turned off for recursive notify_change calls */ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - if (is_quota_modification(mnt_userns, inode, attr)) { + if (is_quota_modification(&nop_mnt_idmap, inode, attr)) { error = dquot_initialize(inode); if (error) return error; @@ -3359,7 +3359,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, reiserfs_write_unlock(inode->i_sb); if (error) goto out; - error = dquot_transfer(mnt_userns, inode, attr); + error = dquot_transfer(&nop_mnt_idmap, inode, attr); reiserfs_write_lock(inode->i_sb); if (error) { journal_end(&th); @@ -3398,7 +3398,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } if (!error) { - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); } diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 4b86ecf5817e..6bf9b54e58ca 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -24,7 +24,7 @@ int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int reiserfs_fileattr_set(struct user_namespace *mnt_userns, +int reiserfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); @@ -96,7 +96,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = put_user(inode->i_generation, (int __user *)arg); break; case REISERFS_IOC_SETVERSION: - if (!inode_owner_or_capable(&init_user_ns, inode)) { + if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) { err = -EPERM; break; } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 0b8aa99749f1..42d2c20e1345 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -616,11 +616,11 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) * the quota init calls have to know who to charge the quota to, so * we have to set uid and gid here */ - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); return dquot_initialize(inode); } -static int reiserfs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int reiserfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { int retval; @@ -700,7 +700,7 @@ out_failed: return retval; } -static int reiserfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int reiserfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { int retval; @@ -784,7 +784,7 @@ out_failed: return retval; } -static int reiserfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int reiserfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int retval; @@ -1099,7 +1099,7 @@ out_unlink: return retval; } -static int reiserfs_symlink(struct user_namespace *mnt_userns, +static int reiserfs_symlink(struct mnt_idmap *idmap, struct inode *parent_dir, struct dentry *dentry, const char *symname) { @@ -1311,7 +1311,7 @@ static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de, * one path. If it holds 2 or more, it can get into endless waiting in * get_empty_nodes or its clones */ -static int reiserfs_rename(struct user_namespace *mnt_userns, +static int reiserfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 3aa928ec527a..98e6f53c2fe0 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -3100,7 +3100,7 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, } void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); -int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); @@ -3407,7 +3407,7 @@ __u32 r5_hash(const signed char *msg, int len); /* prototypes from ioctl.c */ int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int reiserfs_fileattr_set(struct user_namespace *mnt_userns, +int reiserfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long reiserfs_compat_ioctl(struct file *filp, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8b2d52443f41..06d810c72c52 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -66,14 +66,14 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) { BUG_ON(!inode_is_locked(dir)); - return dir->i_op->create(&init_user_ns, dir, dentry, mode, true); + return dir->i_op->create(&nop_mnt_idmap, dir, dentry, mode, true); } #endif static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { BUG_ON(!inode_is_locked(dir)); - return dir->i_op->mkdir(&init_user_ns, dir, dentry, mode); + return dir->i_op->mkdir(&nop_mnt_idmap, dir, dentry, mode); } /* @@ -352,7 +352,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data) * ATTR_MODE is set. */ attrs->ia_valid &= (ATTR_UID|ATTR_GID); - err = reiserfs_setattr(&init_user_ns, dentry, attrs); + err = reiserfs_setattr(&nop_mnt_idmap, dentry, attrs); attrs->ia_valid = ia_valid; return err; @@ -597,7 +597,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR); inode_dio_wait(d_inode(dentry)); - err = reiserfs_setattr(&init_user_ns, dentry, &newattrs); + err = reiserfs_setattr(&nop_mnt_idmap, dentry, &newattrs); inode_unlock(d_inode(dentry)); } else update_ctime(inode); @@ -941,7 +941,7 @@ static int xattr_mount_check(struct super_block *s) return 0; } -int reiserfs_permission(struct user_namespace *mnt_userns, struct inode *inode, +int reiserfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { /* @@ -951,7 +951,7 @@ int reiserfs_permission(struct user_namespace *mnt_userns, struct inode *inode, if (IS_PRIVATE(inode)) return 0; - return generic_permission(&init_user_ns, inode, mask); + return generic_permission(&nop_mnt_idmap, inode, mask); } static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags) diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index e47fde1182de..5868a4e990e3 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h @@ -16,7 +16,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -int reiserfs_permission(struct user_namespace *mnt_userns, +int reiserfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); #ifdef CONFIG_REISERFS_FS_XATTR diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 93fe414fed18..138060452678 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -18,7 +18,7 @@ static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th, int -reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +reiserfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int error, error2; @@ -42,7 +42,7 @@ reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, reiserfs_write_unlock(inode->i_sb); if (error == 0) { if (type == ACL_TYPE_ACCESS && acl) { - error = posix_acl_update_mode(&init_user_ns, inode, + error = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (error) goto unlock; @@ -407,5 +407,5 @@ int reiserfs_acl_chmod(struct dentry *dentry) !reiserfs_posixacl(inode->i_sb)) return 0; - return posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); + return posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); } diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 857a65b05726..41c0ea84fbff 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -22,7 +22,7 @@ security_get(const struct xattr_handler *handler, struct dentry *unused, static int security_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, struct dentry *unused, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index d853cea2afcd..0c0c74d8db0e 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -21,7 +21,7 @@ trusted_get(const struct xattr_handler *handler, struct dentry *unused, static int trusted_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, struct dentry *unused, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 65d9cd10a5ea..88195181e1d7 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -18,7 +18,7 @@ user_get(const struct xattr_handler *handler, struct dentry *unused, } static int -user_set(const struct xattr_handler *handler, struct user_namespace *mnt_userns, +user_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) diff --git a/fs/remap_range.c b/fs/remap_range.c index 41f60477bb41..1331a890f2f2 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -419,16 +419,16 @@ EXPORT_SYMBOL(vfs_clone_file_range); /* Check whether we are allowed to dedupe the destination file */ static bool allow_file_dedupe(struct file *file) { - struct user_namespace *mnt_userns = file_mnt_user_ns(file); + struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file); if (capable(CAP_SYS_ADMIN)) return true; if (file->f_mode & FMODE_WRITE) return true; - if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), current_fsuid())) + if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) return true; - if (!inode_permission(mnt_userns, inode, MAY_WRITE)) + if (!inode_permission(idmap, inode, MAY_WRITE)) return true; return false; } diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index b3fdc8212c5f..95f8e8901768 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw) #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ sizeof(u64)) /* xattr id lookup table defines */ -#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) +#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id)) #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ SQUASHFS_METADATA_SIZE) diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 659082e9e51d..72f6f4b37863 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -63,7 +63,7 @@ struct squashfs_sb_info { long long bytes_used; unsigned int inodes; unsigned int fragments; - int xattr_ids; + unsigned int xattr_ids; unsigned int ids; bool panic_on_errors; const struct squashfs_decompressor_thread_ops *thread_ops; diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index d8a270d3ac4c..f1a463d8bfa0 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h @@ -10,12 +10,12 @@ #ifdef CONFIG_SQUASHFS_XATTR extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, - u64 *, int *); + u64 *, unsigned int *); extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, unsigned int *, unsigned long long *); #else static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, - u64 start, u64 *xattr_table_start, int *xattr_ids) + u64 start, u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_xattr_id_table *id_table; diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index 087cab8c78f4..c8469c656e0d 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, * Read uncompressed xattr id lookup table indexes from disk into memory */ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, - u64 *xattr_table_start, int *xattr_ids) + u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_sb_info *msblk = sb->s_fs_info; unsigned int len, indexes; diff --git a/fs/stat.c b/fs/stat.c index d6cc74ca8486..7c238da22ef0 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -18,6 +18,7 @@ #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/compat.h> +#include <linux/iversion.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -27,7 +28,7 @@ /** * generic_fillattr - Fill in the basic attributes from the inode struct - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: Inode to use as the source * @stat: Where to fill in the attributes * @@ -35,17 +36,17 @@ * found on the VFS inode structure. This is the default if no getattr inode * operation is supplied. * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then - * take care to map the inode according to @mnt_userns before filling in the + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then + * take care to map the inode according to @idmap before filling in the * uid and gid filds. On non-idmapped mounts or if permission checking is to be - * performed on the raw inode simply passs init_user_ns. + * performed on the raw inode simply passs @nop_mnt_idmap. */ -void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode, +void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode, struct kstat *stat) { - vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; @@ -97,7 +98,7 @@ EXPORT_SYMBOL(generic_fill_statx_attr); int vfs_getattr_nosec(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; struct inode *inode = d_backing_inode(path->dentry); memset(stat, 0, sizeof(*stat)); @@ -122,12 +123,17 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); - mnt_userns = mnt_user_ns(path->mnt); + if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { + stat->result_mask |= STATX_CHANGE_COOKIE; + stat->change_cookie = inode_query_iversion(inode); + } + + idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) - return inode->i_op->getattr(mnt_userns, path, stat, + return inode->i_op->getattr(idmap, path, stat, request_mask, query_flags); - generic_fillattr(mnt_userns, inode, stat); + generic_fillattr(idmap, inode, stat); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); @@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) memset(&tmp, 0, sizeof(tmp)); - tmp.stx_mask = stat->result_mask; + /* STATX_CHANGE_COOKIE is kernel-only for now */ + tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE; tmp.stx_blksize = stat->blksize; - tmp.stx_attributes = stat->attributes; + /* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */ + tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC; tmp.stx_nlink = stat->nlink; tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); @@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; + /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests + * from userland. + */ + mask &= ~STATX_CHANGE_COOKIE; + error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 90e00124ea07..50eb92557a0f 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -29,13 +29,13 @@ const struct file_operations sysv_file_operations = { .splice_read = generic_file_splice_read, }; -static int sysv_setattr(struct user_namespace *mnt_userns, +static int sysv_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -48,7 +48,7 @@ static int sysv_setattr(struct user_namespace *mnt_userns, sysv_truncate(inode); } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 50df794a3c1f..e732879036ab 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -163,7 +163,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode) *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count); fs16_add(sbi, sbi->s_sb_total_free_inodes, -1); dirty_sb(sb); - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = fs16_to_cpu(sbi, ino); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_blocks = 0; diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 3b8567564e7e..b22764fe669c 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -441,11 +441,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size) return res; } -int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path, +int sysv_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct super_block *s = path->dentry->d_sb; - generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); stat->blksize = s->s_blocksize; return 0; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index b2e6abc06a2d..ecd424461511 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -41,7 +41,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un return d_splice_alias(inode, dentry); } -static int sysv_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int sysv_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode * inode; @@ -61,13 +61,13 @@ static int sysv_mknod(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int sysv_create(struct user_namespace *mnt_userns, struct inode *dir, +static int sysv_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return sysv_mknod(&init_user_ns, dir, dentry, mode, 0); + return sysv_mknod(&nop_mnt_idmap, dir, dentry, mode, 0); } -static int sysv_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int sysv_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int err = -ENAMETOOLONG; @@ -110,7 +110,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, return add_nondir(dentry, inode); } -static int sysv_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int sysv_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode * inode; @@ -189,7 +189,7 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) * Anybody can rename anything with this: the permission checks are left to the * higher-level routines. */ -static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 99ddf033da4f..5e122a5673c1 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -141,7 +141,7 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int); extern int sysv_write_inode(struct inode *, struct writeback_control *wbc); extern int sysv_sync_inode(struct inode *); extern void sysv_set_inode(struct inode *, dev_t); -extern int sysv_getattr(struct user_namespace *, const struct path *, +extern int sysv_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int sysv_init_icache(void); extern void sysv_destroy_icache(void); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index da85b3979195..57ac8aa4a724 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -67,7 +67,7 @@ static char *get_dname(struct dentry *dentry) return name; } -static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns, +static int tracefs_syscall_mkdir(struct mnt_idmap *idmap, struct inode *inode, struct dentry *dentry, umode_t mode) { diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 0f29cf201136..1e92c1730c16 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -95,7 +95,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, */ inode->i_flags |= S_NOCMTIME; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mapping->nrpages = 0; @@ -283,7 +283,7 @@ static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry, return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm); } -static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir, +static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; @@ -426,7 +426,7 @@ static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } -static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct dentry *dentry = file->f_path.dentry; @@ -979,7 +979,7 @@ out_fname: return err; } -static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -1052,7 +1052,7 @@ out_budg: return err; } -static int ubifs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -1141,7 +1141,7 @@ out_budg: return err; } -static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct inode *inode; @@ -1606,7 +1606,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, return err; } -static int ubifs_rename(struct user_namespace *mnt_userns, +static int ubifs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -1631,7 +1631,7 @@ static int ubifs_rename(struct user_namespace *mnt_userns, return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } -int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { loff_t size; @@ -1654,7 +1654,7 @@ int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f2353dd676ef..8cb5d76b301c 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1258,7 +1258,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, return err; } -int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int err; @@ -1267,7 +1267,7 @@ int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, dbg_gen("ino %lu, mode %#x, ia_valid %#x", inode->i_ino, inode->i_mode, attr->ia_valid); - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; @@ -1608,11 +1608,11 @@ static const char *ubifs_get_link(struct dentry *dentry, return fscrypt_get_symlink(inode, ui->data, ui->data_len, done); } -static int ubifs_symlink_getattr(struct user_namespace *mnt_userns, +static int ubifs_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - ubifs_getattr(mnt_userns, path, stat, request_mask, query_flags); + ubifs_getattr(idmap, path, stat, request_mask, query_flags); if (IS_ENCRYPTED(d_inode(path->dentry))) return fscrypt_symlink_getattr(path, stat); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 71bcebe45f9c..67c5108abd89 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -144,7 +144,7 @@ int ubifs_fileattr_get(struct dentry *dentry, struct fileattr *fa) return 0; } -int ubifs_fileattr_set(struct user_namespace *mnt_userns, +int ubifs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 478bbbb5382f..9063b73536f8 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -2020,15 +2020,15 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); -int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, umode_t mode, bool is_xattr); -int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags); +int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); int ubifs_check_dir_empty(struct inode *dir); /* xattr.c */ @@ -2085,7 +2085,7 @@ void ubifs_destroy_size_tree(struct ubifs_info *c); /* ioctl.c */ int ubifs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int ubifs_fileattr_set(struct user_namespace *mnt_userns, +int ubifs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); void ubifs_set_inode_flags(struct inode *inode); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 3db8486e3725..349228dd1191 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -699,7 +699,7 @@ static int xattr_get(const struct xattr_handler *handler, } static int xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/fs/udf/file.c b/fs/udf/file.c index 5c659e23e578..2efbbbaa2da7 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -256,14 +256,14 @@ const struct file_operations udf_file_operations = { .llseek = generic_file_llseek, }; -static int udf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +static int udf_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct super_block *sb = inode->i_sb; int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -286,7 +286,7 @@ static int udf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (attr->ia_valid & ATTR_MODE) udf_update_extra_perms(inode, attr->ia_mode); - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index b5d611cee749..e78a859d13e3 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -105,7 +105,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) mutex_unlock(&sbi->s_alloc_mutex); } - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) inode->i_uid = sbi->s_uid; if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1d7c2a812fc1..34e416327dd4 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -595,7 +595,7 @@ static void udf_do_extend_final_block(struct inode *inode, */ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) return; - added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen; + added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLength += added_bytes; UDF_I(inode)->i_lenExtents += added_bytes; @@ -684,7 +684,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, struct kernel_lb_addr eloc, tmpeloc; int c = 1; loff_t lbcount = 0, b_off = 0; - udf_pblk_t newblocknum, newblock; + udf_pblk_t newblocknum, newblock = 0; sector_t offset = 0; int8_t etype; struct udf_inode_info *iinfo = UDF_I(inode); @@ -787,7 +787,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len); if (ret < 0) { *err = ret; - newblock = 0; goto out_free; } c = 0; @@ -852,7 +851,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, goal, err); if (!newblocknum) { *err = -ENOSPC; - newblock = 0; goto out_free; } if (isBeyondEOF) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 7c95c549dd64..bdba2206a678 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -606,7 +606,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } -static int udf_create(struct user_namespace *mnt_userns, struct inode *dir, +static int udf_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode = udf_new_inode(dir, mode); @@ -625,7 +625,7 @@ static int udf_create(struct user_namespace *mnt_userns, struct inode *dir, return udf_add_nondir(dentry, inode); } -static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +static int udf_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode = udf_new_inode(dir, mode); @@ -645,7 +645,7 @@ static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, return finish_open_simple(file, 0); } -static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int udf_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -661,7 +661,7 @@ static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir, return udf_add_nondir(dentry, inode); } -static int udf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +static int udf_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; @@ -881,7 +881,7 @@ out: return retval; } -static int udf_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int udf_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777); @@ -1073,7 +1073,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, /* Anybody can rename anything with this: the permission checks are left to the * higher-level routines. */ -static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index f3642f9c23f8..5cf763911aef 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -153,7 +153,7 @@ out_unmap: return err; } -static int udf_symlink_getattr(struct user_namespace *mnt_userns, +static int udf_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -161,7 +161,7 @@ static int udf_symlink_getattr(struct user_namespace *mnt_userns, struct inode *inode = d_backing_inode(dentry); struct page *page; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); page = read_mapping_page(inode->i_mapping, 0, NULL); if (IS_ERR(page)) return PTR_ERR(page); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7e3e08c0166f..06bd84d555bd 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -289,7 +289,7 @@ cg_found: ufs_mark_sb_dirty(sb); inode->i_ino = cg * uspi->s_ipg + bit; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_blocks = 0; inode->i_generation = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a873de7dec1c..a4246c83a8cd 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -1212,14 +1212,14 @@ out: return err; } -int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int ufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); unsigned int ia_valid = attr->ia_valid; int error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; @@ -1229,7 +1229,7 @@ int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, return error; } - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 29d5a0e0c8f0..36154b5aca6d 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -69,7 +69,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ufs_create (struct user_namespace * mnt_userns, +static int ufs_create (struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { @@ -86,7 +86,7 @@ static int ufs_create (struct user_namespace * mnt_userns, return ufs_add_nondir(dentry, inode); } -static int ufs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int ufs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -106,7 +106,7 @@ static int ufs_mknod(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int ufs_symlink (struct user_namespace * mnt_userns, struct inode * dir, +static int ufs_symlink (struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, const char * symname) { struct super_block * sb = dir->i_sb; @@ -166,7 +166,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, return error; } -static int ufs_mkdir(struct user_namespace * mnt_userns, struct inode * dir, +static int ufs_mkdir(struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode) { struct inode * inode; @@ -243,7 +243,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) return err; } -static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 550f7c5a3636..6b499180643b 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -123,7 +123,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long); extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); extern void ufs_evict_inode (struct inode *); -extern int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int ufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); /* namei.c */ diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 98ac37e34e3d..cc694846617a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) return ctx->features & UFFD_FEATURE_INITIALIZED; } +static void userfaultfd_set_vm_flags(struct vm_area_struct *vma, + vm_flags_t flags) +{ + const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP; + + vma->vm_flags = flags; + /* + * For shared mappings, we want to enable writenotify while + * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply + * recalculate vma->vm_page_prot whenever userfaultfd-wp changes. + */ + if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed) + vma_set_page_prot(vma); +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, for_each_vma(vmi, vma) { if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, + vma->vm_flags & ~__VM_UFFD_FLAGS); } } mmap_write_unlock(mm); @@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); return 0; } @@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, } else { /* Drop uffd context if remap feature not enabled */ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); } } @@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) prev = vma; } - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } mmap_write_unlock(mm); @@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx.ctx = ctx; if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) @@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; skip: diff --git a/fs/utimes.c b/fs/utimes.c index 39f356017635..3701b3946f88 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -7,6 +7,7 @@ #include <linux/uaccess.h> #include <linux/compat.h> #include <asm/unistd.h> +#include <linux/filelock.h> static bool nsec_valid(long nsec) { @@ -62,7 +63,7 @@ int vfs_utimes(const struct path *path, struct timespec64 *times) } retry_deleg: inode_lock(inode); - error = notify_change(mnt_user_ns(path->mnt), path->dentry, &newattrs, + error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index c4769a9396c5..075f15c43c78 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -294,14 +294,14 @@ out: return err; } -static int vboxsf_dir_mkfile(struct user_namespace *mnt_userns, +static int vboxsf_dir_mkfile(struct mnt_idmap *idmap, struct inode *parent, struct dentry *dentry, umode_t mode, bool excl) { return vboxsf_dir_create(parent, dentry, mode, false, excl, NULL); } -static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns, +static int vboxsf_dir_mkdir(struct mnt_idmap *idmap, struct inode *parent, struct dentry *dentry, umode_t mode) { @@ -387,7 +387,7 @@ static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) return 0; } -static int vboxsf_dir_rename(struct user_namespace *mnt_userns, +static int vboxsf_dir_rename(struct mnt_idmap *idmap, struct inode *old_parent, struct dentry *old_dentry, struct inode *new_parent, @@ -430,7 +430,7 @@ err_put_old_path: return err; } -static int vboxsf_dir_symlink(struct user_namespace *mnt_userns, +static int vboxsf_dir_symlink(struct mnt_idmap *idmap, struct inode *parent, struct dentry *dentry, const char *symname) { diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c index e1db0f3f7e5e..dd0ae1188e87 100644 --- a/fs/vboxsf/utils.c +++ b/fs/vboxsf/utils.c @@ -231,7 +231,7 @@ int vboxsf_inode_revalidate(struct dentry *dentry) return 0; } -int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path, +int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *kstat, u32 request_mask, unsigned int flags) { int err; @@ -252,11 +252,11 @@ int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path, if (err) return err; - generic_fillattr(&init_user_ns, d_inode(dentry), kstat); + generic_fillattr(&nop_mnt_idmap, d_inode(dentry), kstat); return 0; } -int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int vboxsf_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry)); diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h index 9047befa66c5..05973eb89d52 100644 --- a/fs/vboxsf/vfsmod.h +++ b/fs/vboxsf/vfsmod.h @@ -97,10 +97,10 @@ int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, struct shfl_fsobjinfo *info); int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info); int vboxsf_inode_revalidate(struct dentry *dentry); -int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path, +int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *kstat, u32 request_mask, unsigned int query_flags); -int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +int vboxsf_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, struct dentry *dentry); diff --git a/fs/xattr.c b/fs/xattr.c index adab9a70b536..14a7eb3c8fa8 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -9,6 +9,7 @@ Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/xattr.h> @@ -82,7 +83,7 @@ xattr_resolve_name(struct inode *inode, const char **name) /** * may_write_xattr - check whether inode allows writing xattr - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: the inode on which to set an xattr * * Check whether the inode allows writing xattrs. Specifically, we can never @@ -94,13 +95,13 @@ xattr_resolve_name(struct inode *inode, const char **name) * * Return: On success zero is returned. On error a negative errno is returned. */ -int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode) +int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode) { if (IS_IMMUTABLE(inode)) return -EPERM; if (IS_APPEND(inode)) return -EPERM; - if (HAS_UNMAPPED_ID(mnt_userns, inode)) + if (HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; return 0; } @@ -110,13 +111,13 @@ int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode) * because different namespaces have very different rules. */ static int -xattr_permission(struct user_namespace *mnt_userns, struct inode *inode, +xattr_permission(struct mnt_idmap *idmap, struct inode *inode, const char *name, int mask) { if (mask & MAY_WRITE) { int ret; - ret = may_write_xattr(mnt_userns, inode); + ret = may_write_xattr(idmap, inode); if (ret) return ret; } @@ -148,11 +149,11 @@ xattr_permission(struct user_namespace *mnt_userns, struct inode *inode, return (mask & MAY_WRITE) ? -EPERM : -ENODATA; if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && (mask & MAY_WRITE) && - !inode_owner_or_capable(mnt_userns, inode)) + !inode_owner_or_capable(idmap, inode)) return -EPERM; } - return inode_permission(mnt_userns, inode, mask); + return inode_permission(idmap, inode, mask); } /* @@ -183,7 +184,7 @@ xattr_supported_namespace(struct inode *inode, const char *prefix) EXPORT_SYMBOL(xattr_supported_namespace); int -__vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, +__vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { @@ -199,7 +200,7 @@ __vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, return -EOPNOTSUPP; if (size == 0) value = ""; /* empty EA, do not remove */ - return handler->set(handler, mnt_userns, dentry, inode, name, value, + return handler->set(handler, idmap, dentry, inode, name, value, size, flags); } EXPORT_SYMBOL(__vfs_setxattr); @@ -208,7 +209,7 @@ EXPORT_SYMBOL(__vfs_setxattr); * __vfs_setxattr_noperm - perform setxattr operation without performing * permission checks. * - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: object to perform setxattr on * @name: xattr name to set * @value: value to set @name to @@ -221,7 +222,7 @@ EXPORT_SYMBOL(__vfs_setxattr); * is executed. It also assumes that the caller will make the appropriate * permission checks. */ -int __vfs_setxattr_noperm(struct user_namespace *mnt_userns, +int __vfs_setxattr_noperm(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { @@ -233,7 +234,7 @@ int __vfs_setxattr_noperm(struct user_namespace *mnt_userns, if (issec) inode->i_flags &= ~S_NOSEC; if (inode->i_opflags & IOP_XATTR) { - error = __vfs_setxattr(mnt_userns, dentry, inode, name, value, + error = __vfs_setxattr(idmap, dentry, inode, name, value, size, flags); if (!error) { fsnotify_xattr(dentry); @@ -264,7 +265,7 @@ int __vfs_setxattr_noperm(struct user_namespace *mnt_userns, * __vfs_setxattr_locked - set an extended attribute while holding the inode * lock * - * @mnt_userns: user namespace of the mount of the target inode + * @idmap: idmap of the mount of the target inode * @dentry: object to perform setxattr on * @name: xattr name to set * @value: value to set @name to @@ -274,18 +275,18 @@ int __vfs_setxattr_noperm(struct user_namespace *mnt_userns, * a delegation was broken on, NULL if none. */ int -__vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry, +__vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; int error; - error = xattr_permission(mnt_userns, inode, name, MAY_WRITE); + error = xattr_permission(idmap, inode, name, MAY_WRITE); if (error) return error; - error = security_inode_setxattr(mnt_userns, dentry, name, value, size, + error = security_inode_setxattr(idmap, dentry, name, value, size, flags); if (error) goto out; @@ -294,7 +295,7 @@ __vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) goto out; - error = __vfs_setxattr_noperm(mnt_userns, dentry, name, value, + error = __vfs_setxattr_noperm(idmap, dentry, name, value, size, flags); out: @@ -303,7 +304,7 @@ out: EXPORT_SYMBOL_GPL(__vfs_setxattr_locked); int -vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, +vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; @@ -312,7 +313,7 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, int error; if (size && strcmp(name, XATTR_NAME_CAPS) == 0) { - error = cap_convert_nscap(mnt_userns, dentry, &value, size); + error = cap_convert_nscap(idmap, dentry, &value, size); if (error < 0) return error; size = error; @@ -320,7 +321,7 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, retry_deleg: inode_lock(inode); - error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size, + error = __vfs_setxattr_locked(idmap, dentry, name, value, size, flags, &delegated_inode); inode_unlock(inode); @@ -337,19 +338,19 @@ retry_deleg: EXPORT_SYMBOL_GPL(vfs_setxattr); static ssize_t -xattr_getsecurity(struct user_namespace *mnt_userns, struct inode *inode, +xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void *value, size_t size) { void *buffer = NULL; ssize_t len; if (!value || !size) { - len = security_inode_getsecurity(mnt_userns, inode, name, + len = security_inode_getsecurity(idmap, inode, name, &buffer, false); goto out_noalloc; } - len = security_inode_getsecurity(mnt_userns, inode, name, &buffer, + len = security_inode_getsecurity(idmap, inode, name, &buffer, true); if (len < 0) return len; @@ -374,7 +375,7 @@ out_noalloc: * Returns the result of alloc, if failed, or the getxattr operation. */ int -vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry, +vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, char **xattr_value, size_t xattr_size, gfp_t flags) { @@ -383,7 +384,7 @@ vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry, char *value = *xattr_value; int error; - error = xattr_permission(mnt_userns, inode, name, MAY_READ); + error = xattr_permission(idmap, inode, name, MAY_READ); if (error) return error; @@ -427,13 +428,13 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, EXPORT_SYMBOL(__vfs_getxattr); ssize_t -vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry, +vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; - error = xattr_permission(mnt_userns, inode, name, MAY_READ); + error = xattr_permission(idmap, inode, name, MAY_READ); if (error) return error; @@ -444,7 +445,7 @@ vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - int ret = xattr_getsecurity(mnt_userns, inode, suffix, value, + int ret = xattr_getsecurity(idmap, inode, suffix, value, size); /* * Only overwrite the return value if a security module @@ -480,7 +481,7 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size) EXPORT_SYMBOL_GPL(vfs_listxattr); int -__vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, +__vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { struct inode *inode = d_inode(dentry); @@ -494,7 +495,7 @@ __vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, return PTR_ERR(handler); if (!handler->set) return -EOPNOTSUPP; - return handler->set(handler, mnt_userns, dentry, inode, name, NULL, 0, + return handler->set(handler, idmap, dentry, inode, name, NULL, 0, XATTR_REPLACE); } EXPORT_SYMBOL(__vfs_removexattr); @@ -503,25 +504,25 @@ EXPORT_SYMBOL(__vfs_removexattr); * __vfs_removexattr_locked - set an extended attribute while holding the inode * lock * - * @mnt_userns: user namespace of the mount of the target inode + * @idmap: idmap of the mount of the target inode * @dentry: object to perform setxattr on * @name: name of xattr to remove * @delegated_inode: on return, will contain an inode pointer that * a delegation was broken on, NULL if none. */ int -__vfs_removexattr_locked(struct user_namespace *mnt_userns, +__vfs_removexattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; int error; - error = xattr_permission(mnt_userns, inode, name, MAY_WRITE); + error = xattr_permission(idmap, inode, name, MAY_WRITE); if (error) return error; - error = security_inode_removexattr(mnt_userns, dentry, name); + error = security_inode_removexattr(idmap, dentry, name); if (error) goto out; @@ -529,7 +530,7 @@ __vfs_removexattr_locked(struct user_namespace *mnt_userns, if (error) goto out; - error = __vfs_removexattr(mnt_userns, dentry, name); + error = __vfs_removexattr(idmap, dentry, name); if (!error) { fsnotify_xattr(dentry); @@ -542,7 +543,7 @@ out: EXPORT_SYMBOL_GPL(__vfs_removexattr_locked); int -vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, +vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; @@ -551,7 +552,7 @@ vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, retry_deleg: inode_lock(inode); - error = __vfs_removexattr_locked(mnt_userns, dentry, + error = __vfs_removexattr_locked(idmap, dentry, name, &delegated_inode); inode_unlock(inode); @@ -605,7 +606,7 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, return do_set_acl(idmap, dentry, ctx->kname->name, ctx->kvalue, ctx->size); - return vfs_setxattr(mnt_idmap_owner(idmap), dentry, ctx->kname->name, + return vfs_setxattr(idmap, dentry, ctx->kname->name, ctx->kvalue, ctx->size, ctx->flags); } @@ -714,8 +715,7 @@ do_getxattr(struct mnt_idmap *idmap, struct dentry *d, if (is_posix_acl_xattr(ctx->kname->name)) error = do_get_acl(idmap, d, kname, ctx->kvalue, ctx->size); else - error = vfs_getxattr(mnt_idmap_owner(idmap), d, kname, - ctx->kvalue, ctx->size); + error = vfs_getxattr(idmap, d, kname, ctx->kvalue, ctx->size); if (error > 0) { if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error)) error = -EFAULT; @@ -892,9 +892,9 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d, return error; if (is_posix_acl_xattr(kname)) - return vfs_remove_acl(mnt_idmap_owner(idmap), d, kname); + return vfs_remove_acl(idmap, d, kname); - return vfs_removexattr(mnt_idmap_owner(idmap), d, kname); + return vfs_removexattr(idmap, d, kname); } static int path_removexattr(const char __user *pathname, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4c16c8c31fcb..35f574421670 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4666,7 +4666,12 @@ xfs_btree_space_to_height( const unsigned int *limits, unsigned long long leaf_blocks) { - unsigned long long node_blocks = limits[1]; + /* + * The root btree block can have fewer than minrecs pointers in it + * because the tree might not be big enough to require that amount of + * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. + */ + unsigned long long node_blocks = 2; unsigned long long blocks_left = leaf_blocks - 1; unsigned int height = 1; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index a05f44eb8178..791db7d9c849 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -242,7 +242,7 @@ xfs_acl_set_mode( } int -xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +xfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { umode_t mode; @@ -258,7 +258,7 @@ xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, return error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); + error = posix_acl_update_mode(idmap, inode, &mode, &acl); if (error) return error; set_mode = true; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index dcd176149c7a..bf7f960997d3 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -11,7 +11,7 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); -extern int xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int xfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index ad22a003f959..f3d328e4a440 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -236,6 +236,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 595a5bcf46b9..d06c0cc62f61 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1047,7 +1047,7 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_vn_setattr_size(file_mnt_user_ns(file), + error = xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file), &iattr); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f35e2cee5265..ddeaccc04aec 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1853,12 +1853,20 @@ xfs_inodegc_worker( struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; + unsigned int nofs_flag; WRITE_ONCE(gc->items, 0); if (!node) return; + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + ip = llist_entry(node, struct xfs_inode, i_gclist); trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); @@ -1867,6 +1875,8 @@ xfs_inodegc_worker( xfs_iflags_set(ip, XFS_INACTIVATING); xfs_inodegc_inactivate(ip); } + + memalloc_nofs_restore(nofs_flag); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d354ea2b74f9..7f1d715faab5 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -777,7 +777,7 @@ xfs_inode_inherit_flags2( */ int xfs_init_new_inode( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_trans *tp, struct xfs_inode *pip, xfs_ino_t ino, @@ -823,11 +823,11 @@ xfs_init_new_inode( ip->i_projid = prid; if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { - inode_fsuid_set(inode, mnt_userns); + inode_fsuid_set(inode, idmap); inode->i_gid = dir->i_gid; inode->i_mode = mode; } else { - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); } /* @@ -836,7 +836,7 @@ xfs_init_new_inode( * (and only if the irix_sgid_inherit compatibility variable is set). */ if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && - !vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode))) + !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) inode->i_mode &= ~S_ISGID; ip->i_disk_size = 0; @@ -946,7 +946,7 @@ xfs_bumplink( int xfs_create( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, xfs_inode_t *dp, struct xfs_name *name, umode_t mode, @@ -978,8 +978,8 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), - mapped_fsgid(mnt_userns, &init_user_ns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), + mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1020,7 +1020,7 @@ xfs_create( */ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); if (!error) - error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode, + error = xfs_init_new_inode(idmap, tp, dp, ino, mode, is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); if (error) goto out_trans_cancel; @@ -1102,7 +1102,7 @@ xfs_create( int xfs_create_tmpfile( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_inode *dp, umode_t mode, struct xfs_inode **ipp) @@ -1127,8 +1127,8 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), - mapped_fsgid(mnt_userns, &init_user_ns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), + mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1144,7 +1144,7 @@ xfs_create_tmpfile( error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); if (!error) - error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode, + error = xfs_init_new_inode(idmap, tp, dp, ino, mode, 0, 0, prid, false, &ip); if (error) goto out_trans_cancel; @@ -2709,7 +2709,7 @@ out_trans_abort: */ static int xfs_rename_alloc_whiteout( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_name *src_name, struct xfs_inode *dp, struct xfs_inode **wip) @@ -2718,7 +2718,7 @@ xfs_rename_alloc_whiteout( struct qstr name; int error; - error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE, + error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, &tmpfile); if (error) return error; @@ -2750,7 +2750,7 @@ xfs_rename_alloc_whiteout( */ int xfs_rename( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, @@ -2782,7 +2782,7 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - error = xfs_rename_alloc_whiteout(mnt_userns, src_name, + error = xfs_rename_alloc_whiteout(idmap, src_name, target_dp, &wip); if (error) return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fa780f08dc89..69d21e42c10a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -473,18 +473,18 @@ int xfs_release(struct xfs_inode *ip); void xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); -int xfs_create(struct user_namespace *mnt_userns, +int xfs_create(struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *name, umode_t mode, dev_t rdev, bool need_xattr, struct xfs_inode **ipp); -int xfs_create_tmpfile(struct user_namespace *mnt_userns, +int xfs_create_tmpfile(struct mnt_idmap *idmap, struct xfs_inode *dp, umode_t mode, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); -int xfs_rename(struct user_namespace *mnt_userns, +int xfs_rename(struct mnt_idmap *idmap, struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, @@ -515,7 +515,7 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); -int xfs_init_new_inode(struct user_namespace *mnt_userns, struct xfs_trans *tp, +int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp, struct xfs_inode *pip, xfs_ino_t ino, umode_t mode, xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs, struct xfs_inode **ipp); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 13f1b2add390..55bb01173cde 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -665,7 +665,7 @@ xfs_ioc_fsbulkstat( struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, - .mnt_userns = file_mnt_user_ns(file), + .idmap = file_mnt_idmap(file), .ocount = 0, }; xfs_ino_t lastino; @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; @@ -844,7 +844,7 @@ xfs_ioc_bulkstat( struct xfs_bulk_ireq hdr; struct xfs_ibulk breq = { .mp = mp, - .mnt_userns = file_mnt_user_ns(file), + .idmap = file_mnt_idmap(file), }; int error; @@ -1297,7 +1297,7 @@ xfs_ioctl_setattr_check_projid( int xfs_fileattr_set( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { @@ -1371,7 +1371,7 @@ xfs_fileattr_set( */ if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) && - !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID)) + !capable_wrt_inode_uidgid(idmap, VFS_I(ip), CAP_FSETID)) VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID); /* Change the ownerships and register project quota modifications */ diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index d4abba2c13c1..38be600b5e1e 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -49,7 +49,7 @@ xfs_fileattr_get( extern int xfs_fileattr_set( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 2f54b701eead..ee35eea1ecce 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -204,7 +204,7 @@ xfs_compat_ioc_fsbulkstat( struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, - .mnt_userns = file_mnt_user_ns(file), + .idmap = file_mnt_idmap(file), .ocount = 0, }; xfs_ino_t lastino; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 669c1bc5c3a7..fc1946f80a4a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -83,7 +83,7 @@ xfs_iomap_valid( return true; } -const struct iomap_page_ops xfs_iomap_page_ops = { +static const struct iomap_page_ops xfs_iomap_page_ops = { .iomap_valid = xfs_iomap_valid, }; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 515318dfbc38..24718adb3c16 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -162,12 +162,12 @@ xfs_create_need_xattr( STATIC int xfs_generic_create( - struct user_namespace *mnt_userns, - struct inode *dir, - struct dentry *dentry, - umode_t mode, - dev_t rdev, - struct file *tmpfile) /* unnamed file */ + struct mnt_idmap *idmap, + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev, + struct file *tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; @@ -196,11 +196,11 @@ xfs_generic_create( goto out_free_acl; if (!tmpfile) { - error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev, + error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev, xfs_create_need_xattr(dir, default_acl, acl), &ip); } else { - error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip); + error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, &ip); } if (unlikely(error)) goto out_free_acl; @@ -255,35 +255,34 @@ xfs_generic_create( STATIC int xfs_vn_mknod( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, NULL); + return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL); } STATIC int xfs_vn_create( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool flags) { - return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, NULL); + return xfs_generic_create(idmap, dir, dentry, mode, 0, NULL); } STATIC int xfs_vn_mkdir( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0, - NULL); + return xfs_generic_create(idmap, dir, dentry, mode | S_IFDIR, 0, NULL); } STATIC struct dentry * @@ -400,7 +399,7 @@ xfs_vn_unlink( STATIC int xfs_vn_symlink( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) @@ -417,7 +416,7 @@ xfs_vn_symlink( if (unlikely(error)) goto out; - error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip); + error = xfs_symlink(idmap, XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) goto out; @@ -443,7 +442,7 @@ xfs_vn_symlink( STATIC int xfs_vn_rename( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *odir, struct dentry *odentry, struct inode *ndir, @@ -472,7 +471,7 @@ xfs_vn_rename( if (unlikely(error)) return error; - return xfs_rename(mnt_userns, XFS_I(odir), &oname, + return xfs_rename(idmap, XFS_I(odir), &oname, XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL, flags); } @@ -549,7 +548,7 @@ xfs_stat_blksize( STATIC int xfs_vn_getattr( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, @@ -558,8 +557,8 @@ xfs_vn_getattr( struct inode *inode = d_inode(path->dentry); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); trace_xfs_getattr(ip); @@ -627,7 +626,7 @@ xfs_vn_getattr( static int xfs_vn_change_ok( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { @@ -639,7 +638,7 @@ xfs_vn_change_ok( if (xfs_is_shutdown(mp)) return -EIO; - return setattr_prepare(mnt_userns, dentry, iattr); + return setattr_prepare(idmap, dentry, iattr); } /* @@ -650,7 +649,7 @@ xfs_vn_change_ok( */ static int xfs_setattr_nonsize( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) @@ -679,14 +678,14 @@ xfs_setattr_nonsize( uint qflags = 0; if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = from_vfsuid(mnt_userns, i_user_ns(inode), + uid = from_vfsuid(idmap, i_user_ns(inode), iattr->ia_vfsuid); qflags |= XFS_QMOPT_UQUOTA; } else { uid = inode->i_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = from_vfsgid(mnt_userns, i_user_ns(inode), + gid = from_vfsgid(idmap, i_user_ns(inode), iattr->ia_vfsgid); qflags |= XFS_QMOPT_GQUOTA; } else { @@ -719,18 +718,18 @@ xfs_setattr_nonsize( * also. */ if (XFS_IS_UQUOTA_ON(mp) && - i_uid_needs_update(mnt_userns, iattr, inode)) { + i_uid_needs_update(idmap, iattr, inode)) { ASSERT(udqp); old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } if (XFS_IS_GQUOTA_ON(mp) && - i_gid_needs_update(mnt_userns, iattr, inode)) { + i_gid_needs_update(idmap, iattr, inode)) { ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp)); ASSERT(gdqp); old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - setattr_copy(mnt_userns, inode, iattr); + setattr_copy(idmap, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); @@ -758,7 +757,7 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if (mask & ATTR_MODE) { - error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); + error = posix_acl_chmod(idmap, dentry, inode->i_mode); if (error) return error; } @@ -779,7 +778,7 @@ out_dqrele: */ STATIC int xfs_setattr_size( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) @@ -812,7 +811,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); + return xfs_setattr_nonsize(idmap, dentry, ip, iattr); } /* @@ -956,7 +955,7 @@ xfs_setattr_size( } ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); - setattr_copy(mnt_userns, inode, iattr); + setattr_copy(idmap, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); @@ -977,7 +976,7 @@ out_trans_cancel: int xfs_vn_setattr_size( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { @@ -986,15 +985,15 @@ xfs_vn_setattr_size( trace_xfs_setattr(ip); - error = xfs_vn_change_ok(mnt_userns, dentry, iattr); + error = xfs_vn_change_ok(idmap, dentry, iattr); if (error) return error; - return xfs_setattr_size(mnt_userns, dentry, ip, iattr); + return xfs_setattr_size(idmap, dentry, ip, iattr); } STATIC int xfs_vn_setattr( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { @@ -1014,14 +1013,14 @@ xfs_vn_setattr( return error; } - error = xfs_vn_setattr_size(mnt_userns, dentry, iattr); + error = xfs_vn_setattr_size(idmap, dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { trace_xfs_setattr(ip); - error = xfs_vn_change_ok(mnt_userns, dentry, iattr); + error = xfs_vn_change_ok(idmap, dentry, iattr); if (!error) - error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); + error = xfs_setattr_nonsize(idmap, dentry, ip, iattr); } return error; @@ -1092,12 +1091,12 @@ xfs_vn_fiemap( STATIC int xfs_vn_tmpfile( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { - int err = xfs_generic_create(mnt_userns, dir, file->f_path.dentry, mode, 0, file); + int err = xfs_generic_create(idmap, dir, file->f_path.dentry, mode, 0, file); return finish_open_simple(file, err); } diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index e570dcb5df8d..7f84a0843b24 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -13,7 +13,7 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -int xfs_vn_setattr_size(struct user_namespace *mnt_userns, +int xfs_vn_setattr_size(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *vap); int xfs_inode_init_security(struct inode *inode, struct inode *dir, diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index a1c2bcf65d37..f225413a993c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -55,7 +55,7 @@ struct xfs_bstat_chunk { STATIC int xfs_bulkstat_one_int( struct xfs_mount *mp, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_trans *tp, xfs_ino_t ino, struct xfs_bstat_chunk *bc) @@ -83,8 +83,8 @@ xfs_bulkstat_one_int( ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); inode = VFS_I(ip); - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); /* xfs_iget returns the following without needing * further change. @@ -178,7 +178,7 @@ xfs_bulkstat_one( struct xfs_trans *tp; int error; - if (breq->mnt_userns != &init_user_ns) { + if (breq->idmap != &nop_mnt_idmap) { xfs_warn_ratelimited(breq->mp, "bulkstat not supported inside of idmapped mounts."); return -EINVAL; @@ -199,7 +199,7 @@ xfs_bulkstat_one( if (error) goto out; - error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, tp, + error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, breq->startino, &bc); xfs_trans_cancel(tp); out: @@ -225,7 +225,7 @@ xfs_bulkstat_iwalk( struct xfs_bstat_chunk *bc = data; int error; - error = xfs_bulkstat_one_int(mp, bc->breq->mnt_userns, tp, ino, data); + error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data); /* bulkstat just skips over missing inodes */ if (error == -ENOENT || error == -EINVAL) return 0; @@ -270,7 +270,7 @@ xfs_bulkstat( unsigned int iwalk_flags = 0; int error; - if (breq->mnt_userns != &init_user_ns) { + if (breq->idmap != &nop_mnt_idmap) { xfs_warn_ratelimited(breq->mp, "bulkstat not supported inside of idmapped mounts."); return -EINVAL; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index e2d0eba43f35..1659f13f17a8 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -8,7 +8,7 @@ /* In-memory representation of a userspace request for batch inode data. */ struct xfs_ibulk { struct xfs_mount *mp; - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; void __user *ubuffer; /* user output buffer */ xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index f9878021e7d0..e88f18f85e4b 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -34,6 +34,7 @@ typedef __u32 xfs_nlink_t; #include <linux/module.h> #include <linux/mutex.h> #include <linux/file.h> +#include <linux/filelock.h> #include <linux/swap.h> #include <linux/errno.h> #include <linux/sched/signal.h> diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 38d23f0e703a..23d16186e1a3 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -322,7 +322,7 @@ xfs_fs_commit_blocks( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); if (update_isize) { i_size_write(inode, iattr->ia_size); ip->i_disk_size = iattr->ia_size; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ff53d40a2dae..7dc0db7f5a76 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -68,7 +68,7 @@ restart: while (1) { struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; - int error = 0; + int error; int i; mutex_lock(&qi->qi_tree_lock); @@ -787,7 +787,7 @@ xfs_qm_qino_alloc( error = xfs_dialloc(&tp, 0, S_IFREG, &ino); if (!error) - error = xfs_init_new_inode(&init_user_ns, tp, NULL, ino, + error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino, S_IFREG, 1, 0, 0, false, ipp); if (error) { xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index fe46bce8cae6..5535778a98f9 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -416,8 +416,6 @@ xfs_reflink_fill_cow_hole( goto convert; } - ASSERT(cmap->br_startoff > imap->br_startoff); - /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8389f3ef88ef..85e433df6a3f 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -144,7 +144,7 @@ xfs_readlink( int xfs_symlink( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, @@ -193,8 +193,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), - mapped_fsgid(mnt_userns, &init_user_ns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), + mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -231,7 +231,7 @@ xfs_symlink( */ error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); if (!error) - error = xfs_init_new_inode(mnt_userns, tp, dp, ino, + error = xfs_init_new_inode(idmap, tp, dp, ino, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, false, &ip); if (error) diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index 2586b7e393f3..d1ca1ce62a93 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -7,7 +7,7 @@ /* Kernel only symlink definitions */ -int xfs_symlink(struct user_namespace *mnt_userns, struct xfs_inode *dp, +int xfs_symlink(struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 10aa1fd39d2b..7b9a0ed1b11f 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -133,7 +133,7 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, static int xfs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, struct dentry *unused, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 2c53fbb8d918..72ef97320b99 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -442,6 +442,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(inode, zone, false, false); } + } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && + data_size > isize) { + /* Do not expose garbage data */ + data_size = isize; } /* @@ -600,7 +604,7 @@ unlock: return ret; } -static int zonefs_inode_setattr(struct user_namespace *mnt_userns, +static int zonefs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); @@ -609,7 +613,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns, if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; - ret = setattr_prepare(&init_user_ns, dentry, iattr); + ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (ret) return ret; @@ -626,7 +630,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns, !uid_eq(iattr->ia_uid, inode->i_uid)) || ((iattr->ia_valid & ATTR_GID) && !gid_eq(iattr->ia_gid, inode->i_gid))) { - ret = dquot_transfer(mnt_userns, inode, iattr); + ret = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (ret) return ret; } @@ -637,7 +641,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns, return ret; } - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); return 0; } @@ -805,6 +809,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) ret = submit_bio_wait(bio); + /* + * If the file zone was written underneath the file system, the zone + * write pointer may not be where we expect it to be, but the zone + * append write can still succeed. So check manually that we wrote where + * we intended to, that is, at zi->i_wpoffset. + */ + if (!ret) { + sector_t wpsector = + zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); + + if (bio->bi_iter.bi_sector != wpsector) { + zonefs_warn(inode->i_sb, + "Corrupted write pointer %llu for zone at %llu\n", + wpsector, zi->i_zsector); + ret = -EIO; + } + } + zonefs_file_write_dio_end_io(iocb, size, ret, 0); trace_zonefs_file_dio_append(inode, size, ret); @@ -1405,7 +1427,7 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, struct super_block *sb = parent->i_sb; inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1; - inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555); + inode_init_owner(&nop_mnt_idmap, inode, parent, S_IFDIR | 0555); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &simple_dir_operations; set_nlink(inode, 2); |