From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:22:29 -0500 Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace Fresh mounts of proc and sysfs are a very special case that works very much like a bind mount. Unfortunately the current structure can not preserve the MNT_LOCK... mount flags. Therefore refactor the logic into a form that can be modified to preserve those lock bits. Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount of the filesystem be fully visible in the current mount namespace, before the filesystem may be mounted. Move the logic for calling fs_fully_visible from proc and sysfs into fs/namespace.c where it has greater access to mount namespace state. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 1b9e11167bae..8e7edaf60fe1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,6 +2332,8 @@ unlock: return err; } +static bool fs_fully_visible(struct file_system_type *fs_type); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -2363,6 +2365,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } + if (type->fs_flags & FS_USERNS_VISIBLE) { + if (!fs_fully_visible(type)) + return -EPERM; + } } mnt = vfs_kern_mount(type, flags, name, data); @@ -3164,7 +3170,7 @@ bool current_chrooted(void) return chrooted; } -bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mount *mnt; -- cgit v1.2.3 From 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:49:47 -0500 Subject: mnt: Modify fs_fully_visible to deal with locked ro nodev and atime Ignore an existing mount if the locked readonly, nodev or atime attributes are less permissive than the desired attributes of the new mount. On success ensure the new mount locks all of the same readonly, nodev and atime attributes as the old mount. The nosuid and noexec attributes are not checked here as this change is destined for stable and enforcing those attributes causes a regression in lxc and libvirt-lxc where those applications will not start and there are no known executables on sysfs or proc and no known way to create exectuables without code modifications Cc: stable@vger.kernel.org Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted") Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 8e7edaf60fe1..63b9806235e6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,7 +2332,7 @@ unlock: return err; } -static bool fs_fully_visible(struct file_system_type *fs_type); +static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags); /* * create a new mount for userspace and request it to be added into the @@ -2366,7 +2366,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type)) + if (!fs_fully_visible(type, &mnt_flags)) return -EPERM; } } @@ -3170,9 +3170,10 @@ bool current_chrooted(void) return chrooted; } -static bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; @@ -3191,6 +3192,19 @@ static bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; + /* Verify the mount flags are equal to or more permissive + * than the proposed new mount. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(new_flags & MNT_READONLY)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(new_flags & MNT_NODEV)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) + continue; + /* This mount is not fully visible if there are any child mounts * that cover anything except for empty directories. */ @@ -3201,6 +3215,10 @@ static bool fs_fully_visible(struct file_system_type *type) if (inode->i_nlink > 2) goto next; } + /* Preserve the locked attributes */ + *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \ + MNT_LOCK_NODEV | \ + MNT_LOCK_ATIME); visible = true; goto found; next: ; -- cgit v1.2.3 From ceeb0e5d39fcdf4dca2c997bf225c7fc49200b37 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2015 08:10:09 -0600 Subject: vfs: Ignore unlocked mounts in fs_fully_visible Limit the mounts fs_fully_visible considers to locked mounts. Unlocked can always be unmounted so considering them adds hassle but no security benefit. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 63b9806235e6..8c7b8e0941b9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3205,11 +3205,15 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) continue; - /* This mount is not fully visible if there are any child mounts - * that cover anything except for empty directories. + /* This mount is not fully visible if there are any + * locked child mounts that cover anything except for + * empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; + /* Only worry about locked mounts */ + if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) + continue; if (!S_ISDIR(inode->i_mode)) goto next; if (inode->i_nlink > 2) -- cgit v1.2.3 From 7236c85e1be51a9e25ba0f6e087a66ca89605a49 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 20:51:09 -0500 Subject: mnt: Update fs_fully_visible to test for permanently empty directories fs_fully_visible attempts to make fresh mounts of proc and sysfs give the mounter no more access to proc and sysfs than if they could have by creating a bind mount. One aspect of proc and sysfs that makes this particularly tricky is that there are other filesystems that typically mount on top of proc and sysfs. As those filesystems are mounted on empty directories in practice it is safe to ignore them. However testing to ensure filesystems are mounted on empty directories has not been something the in kernel data structures have supported so the current test for an empty directory which checks to see if nlink <= 2 is a bit lacking. proc and sysfs have recently been modified to use the new empty_dir infrastructure to create all of their dedicated mount points. Instead of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a directory is empty, test for is_empty_dir_inode(inode). That small change guaranteess mounts found on proc and sysfs really are safe to ignore, because the directories are not only empty but nothing can ever be added to them. This guarantees there is nothing to worry about when mounting proc and sysfs. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 8c7b8e0941b9..02c6875dd945 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3214,9 +3214,8 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) /* Only worry about locked mounts */ if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) continue; - if (!S_ISDIR(inode->i_mode)) - goto next; - if (inode->i_nlink > 2) + /* Is the directory permanetly empty? */ + if (!is_empty_dir_inode(inode)) goto next; } /* Preserve the locked attributes */ -- cgit v1.2.3