From b236017acffa73d52eac9427f42d8993067d20fb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 31 May 2016 12:26:41 -0500 Subject: ipc: Initialize ipc_namespace->user_ns early. Allow the ipc namespace initialization code to depend on ns->user_ns being set during initialization. In particular this allows mq_init_ns to use ns->user_ns for permission checks and initializating s_user_ns while the the mq filesystem is being mounted. Acked-by: Seth Forshee Suggested-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- ipc/namespace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'ipc') diff --git a/ipc/namespace.c b/ipc/namespace.c index 068caf18d565..04cb07eb81f1 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -34,8 +34,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, ns->ns.ops = &ipcns_operations; atomic_set(&ns->count, 1); + ns->user_ns = get_user_ns(user_ns); + err = mq_init_ns(ns); if (err) { + put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); kfree(ns); return ERR_PTR(err); @@ -46,8 +49,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, msg_init_ns(ns); shm_init_ns(ns); - ns->user_ns = get_user_ns(user_ns); - return ns; } -- cgit v1.2.3 From d91ee87d8d85a0808c01787e8b4a6b48f2ba487b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 May 2016 14:51:59 -0500 Subject: vfs: Pass data, ns, and ns->userns to mount_ns Today what is normally called data (the mount options) is not passed to fill_super through mount_ns. Pass the mount options and the namespace separately to mount_ns so that filesystems such as proc that have mount options, can use mount_ns. Pass the user namespace to mount_ns so that the standard permission check that verifies the mounter has permissions over the namespace can be performed in mount_ns instead of in each filesystems .mount method. Thus removing the duplication between mqueuefs and proc in terms of permission checks. The extra permission check does not currently affect the rpc_pipefs filesystem and the nfsd filesystem as those filesystems do not currently allow unprivileged mounts. Without unpvileged mounts it is guaranteed that the caller has already passed capable(CAP_SYS_ADMIN) which guarantees extra permission check will pass. Update rpc_pipefs and the nfsd filesystem to ensure that the network namespace reference is always taken in fill_super and always put in kill_sb so that the logic is simpler and so that errors originating inside of fill_super do not cause a network namespace leak. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/nfsd/nfsctl.c | 13 ++++--------- fs/super.c | 13 ++++++++++--- include/linux/fs.h | 5 +++-- ipc/mqueue.c | 19 ++++++++----------- net/sunrpc/rpc_pipe.c | 8 ++++---- 5 files changed, 29 insertions(+), 29 deletions(-) (limited to 'ipc') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9690cb4dd588..5a6ae2522266 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1154,20 +1154,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - struct net *net = data; - int ret; - - ret = simple_fill_super(sb, 0x6e667364, nfsd_files); - if (ret) - return ret; - sb->s_fs_info = get_net(net); - return 0; + get_net(sb->s_fs_info); + return simple_fill_super(sb, 0x6e667364, nfsd_files); } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); + struct net *net = current->nsproxy->net_ns; + return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super); } static void nfsd_umount(struct super_block *sb) diff --git a/fs/super.c b/fs/super.c index d78b9847e6cb..fd65667832e5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -918,12 +918,19 @@ static int ns_set_super(struct super_block *sb, void *data) return set_anon_super(sb, NULL); } -struct dentry *mount_ns(struct file_system_type *fs_type, int flags, - void *data, int (*fill_super)(struct super_block *, void *, int)) +struct dentry *mount_ns(struct file_system_type *fs_type, + int flags, void *data, void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, int)) { struct super_block *sb; - sb = sget(fs_type, ns_test_super, ns_set_super, flags, data); + /* Don't allow mounting unless the caller has CAP_SYS_ADMIN + * over the namespace. + */ + if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + sb = sget(fs_type, ns_test_super, ns_set_super, flags, ns); if (IS_ERR(sb)) return ERR_CAST(sb); diff --git a/include/linux/fs.h b/include/linux/fs.h index 71988dd3af95..1ce006a24f49 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2034,8 +2034,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, - void *data, int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_ns(struct file_system_type *fs_type, + int flags, void *data, void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ade739f67f1d..60d97082f4dc 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -305,7 +305,7 @@ err: static int mqueue_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - struct ipc_namespace *ns = data; + struct ipc_namespace *ns = sb->s_fs_info; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -326,17 +326,14 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - if (!(flags & MS_KERNMOUNT)) { - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - /* Don't allow mounting unless the caller has CAP_SYS_ADMIN - * over the ipc namespace. - */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - data = ns; + struct ipc_namespace *ns; + if (flags & MS_KERNMOUNT) { + ns = data; + data = NULL; + } else { + ns = current->nsproxy->ipc_ns; } - return mount_ns(fs_type, flags, data, mqueue_fill_super); + return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); } static void init_once(void *foo) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index fc48eca21fd2..84f98cbe31c3 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1386,7 +1386,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; struct dentry *root, *gssd_dentry; - struct net *net = data; + struct net *net = get_net(sb->s_fs_info); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int err; @@ -1419,7 +1419,6 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb); if (err) goto err_depopulate; - sb->s_fs_info = get_net(net); mutex_unlock(&sn->pipefs_sb_lock); return 0; @@ -1448,7 +1447,8 @@ static struct dentry * rpc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super); + struct net *net = current->nsproxy->net_ns; + return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super); } static void rpc_kill_sb(struct super_block *sb) @@ -1468,9 +1468,9 @@ static void rpc_kill_sb(struct super_block *sb) RPC_PIPEFS_UMOUNT, sb); mutex_unlock(&sn->pipefs_sb_lock); - put_net(net); out: kill_litter_super(sb); + put_net(net); } static struct file_system_type rpc_pipe_fs_type = { -- cgit v1.2.3 From 3ee690143c3c99f6c0e83f08ff17556890bc6027 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 May 2016 15:42:21 -0500 Subject: ipc/mqueue: The mqueue filesystem should never contain executables Set SB_I_NOEXEC on mqueuefs to ensure small implementation mistakes do not result in executable on mqueuefs by accident. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- ipc/mqueue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'ipc') diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 60d97082f4dc..5bdd50de7d05 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -307,6 +307,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct ipc_namespace *ns = sb->s_fs_info; + sb->s_iflags |= SB_I_NOEXEC; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; -- cgit v1.2.3 From a2982cc922c3068783eb9a1f77a5626a1ec36a1f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 9 Jun 2016 15:34:02 -0500 Subject: vfs: Generalize filesystem nodev handling. Introduce a function may_open_dev that tests MNT_NODEV and a new superblock flab SB_I_NODEV. Use this new function in all of the places where MNT_NODEV was previously tested. Add the new SB_I_NODEV s_iflag to proc, sysfs, and mqueuefs as those filesystems should never support device nodes, and a simple superblock flags makes that very hard to get wrong. With SB_I_NODEV set if any device nodes somehow manage to show up on on a filesystem those device nodes will be unopenable. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/block_dev.c | 2 +- fs/kernfs/mount.c | 4 ++-- fs/namei.c | 8 +++++++- fs/proc/inode.c | 4 ++-- include/linux/fs.h | 2 ++ ipc/mqueue.c | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) (limited to 'ipc') diff --git a/fs/block_dev.c b/fs/block_dev.c index 71ccab1d22c6..30b8d568203a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1857,7 +1857,7 @@ struct block_device *lookup_bdev(const char *pathname) if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (path.mnt->mnt_flags & MNT_NODEV) + if (!may_open_dev(&path)) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 1443df670260..b3d73ad52b22 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -152,8 +152,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) struct dentry *root; info->sb = sb; - /* Userspace would break if executables appear on sysfs */ - sb->s_iflags |= SB_I_NOEXEC; + /* Userspace would break if executables or devices appear on sysfs */ + sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = magic; diff --git a/fs/namei.c b/fs/namei.c index 6a82fb7e2127..757a32725d92 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2881,6 +2881,12 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } EXPORT_SYMBOL(vfs_create); +bool may_open_dev(const struct path *path) +{ + return !(path->mnt->mnt_flags & MNT_NODEV) && + !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); +} + static int may_open(struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; @@ -2899,7 +2905,7 @@ static int may_open(struct path *path, int acc_mode, int flag) break; case S_IFBLK: case S_IFCHR: - if (path->mnt->mnt_flags & MNT_NODEV) + if (!may_open_dev(path)) return -EACCES; /*FALLTHRU*/ case S_IFIFO: diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f4817efb25a6..a5b2c33745b7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -466,8 +466,8 @@ int proc_fill_super(struct super_block *s, void *data, int silent) if (!proc_parse_options(data, ns)) return -EINVAL; - /* User space would break if executables appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC; + /* User space would break if executables or devices appear on proc */ + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9eef64f23a75..e05983170d23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1327,6 +1327,7 @@ struct mm_struct; /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ +#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ @@ -1602,6 +1603,7 @@ extern int vfs_whiteout(struct inode *, struct dentry *); */ extern void inode_init_owner(struct inode *inode, const struct inode *dir, umode_t mode); +extern bool may_open_dev(const struct path *path); /* * VFS FS_IOC_FIEMAP helper definitions. */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5bdd50de7d05..0b13ace266f2 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -307,7 +307,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct ipc_namespace *ns = sb->s_fs_info; - sb->s_iflags |= SB_I_NOEXEC; + sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; -- cgit v1.2.3