From abfcf55d8b07a990589301bc64d82a5d26680956 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Aug 2022 13:35:13 +0200 Subject: acl: handle idmapped mounts for idmapped filesystems Ensure that POSIX ACLs checking, getting, and setting works correctly for filesystems mountable with a filesystem idmapping ("fs_idmapping") that want to support idmapped mounts ("mnt_idmapping"). Note that no filesystems mountable with an fs_idmapping do yet support idmapped mounts. This is required infrastructure work to unblock this. As we explained in detail in [1] the fs_idmapping is irrelevant for getxattr() and setxattr() when mapping the ACL_{GROUP,USER} {g,u}ids stored in the uapi struct posix_acl_xattr_entry in posix_acl_fix_xattr_{from,to}_user(). But for acl_permission_check() and posix_acl_{g,s}etxattr_idmapped_mnt() the fs_idmapping matters. acl_permission_check(): During lookup POSIX ACLs are retrieved directly via i_op->get_acl() and are returned via the kernel internal struct posix_acl which contains e_{g,u}id members of type k{g,u}id_t that already take the fs_idmapping into acccount. For example, a POSIX ACL stored with u4 on the backing store is mapped to k10000004 in the fs_idmapping. The mnt_idmapping remaps the POSIX ACL to k20000004. In order to do that the fs_idmapping needs to be taken into account but that doesn't happen yet (Again, this is a counterfactual currently as fuse doesn't support idmapped mounts currently. It's just used as a convenient example.): fs_idmapping: u0:k10000000:r65536 mnt_idmapping: u0:v20000000:r65536 ACL_USER: k10000004 acl_permission_check() -> check_acl() -> get_acl() -> i_op->get_acl() == fuse_get_acl() -> posix_acl_from_xattr(u0:k10000000:r65536 /* fs_idmapping */, ...) { k10000004 = make_kuid(u0:k10000000:r65536 /* fs_idmapping */, u4 /* ACL_USER */); } -> posix_acl_permission() { -1 = make_vfsuid(u0:v20000000:r65536 /* mnt_idmapping */, &init_user_ns, k10000004); vfsuid_eq_kuid(-1, k10000004 /* caller_fsuid */) } In order to correctly map from the fs_idmapping into mnt_idmapping we require the relevant fs_idmaping to be passed: acl_permission_check() -> check_acl() -> get_acl() -> i_op->get_acl() == fuse_get_acl() -> posix_acl_from_xattr(u0:k10000000:r65536 /* fs_idmapping */, ...) { k10000004 = make_kuid(u0:k10000000:r65536 /* fs_idmapping */, u4 /* ACL_USER */); } -> posix_acl_permission() { v20000004 = make_vfsuid(u0:v20000000:r65536 /* mnt_idmapping */, u0:k10000000:r65536 /* fs_idmapping */, k10000004); vfsuid_eq_kuid(v20000004, k10000004 /* caller_fsuid */) } The initial_idmapping is only correct for the current situation because all filesystems that currently support idmapped mounts do not support being mounted with an fs_idmapping. Note that ovl_get_acl() is used to retrieve the POSIX ACLs from the relevant lower layer and the lower layer's mnt_idmapping needs to be taken into account and so does the fs_idmapping. See 0c5fd887d2bb ("acl: move idmapped mount fixup into vfs_{g,s}etxattr()") for more details. For posix_acl_{g,s}etxattr_idmapped_mnt() it is not as obvious why the fs_idmapping matters as it is for acl_permission_check(). Especially because it doesn't matter for posix_acl_fix_xattr_{from,to}_user() (See [1] for more context.). Because posix_acl_{g,s}etxattr_idmapped_mnt() operate on the uapi struct posix_acl_xattr_entry which contains {g,u}id_t values and thus give the impression that the fs_idmapping is irrelevant as at this point appropriate {g,u}id_t values have seemlingly been generated. As we've stated multiple times this assumption is wrong and in fact the uapi struct posix_acl_xattr_entry is taking idmappings into account depending at what place it is operated on. posix_acl_getxattr_idmapped_mnt() When posix_acl_getxattr_idmapped_mnt() is called the values stored in the uapi struct posix_acl_xattr_entry are mapped according to the fs_idmapping. This happened when they were read from the backing store and then translated from struct posix_acl into the uapi struct posix_acl_xattr_entry during posix_acl_to_xattr(). In other words, the fs_idmapping matters as the values stored as {g,u}id_t in the uapi struct posix_acl_xattr_entry have been generated by it. So we need to take the fs_idmapping into account during make_vfsuid() in posix_acl_getxattr_idmapped_mnt(). posix_acl_setxattr_idmapped_mnt() When posix_acl_setxattr_idmapped_mnt() is called the values stored as {g,u}id_t in uapi struct posix_acl_xattr_entry are intended to be the values that ultimately get turned back into a k{g,u}id_t in posix_acl_from_xattr() (which turns the uapi struct posix_acl_xattr_entry into the kernel internal struct posix_acl). In other words, the fs_idmapping matters as the values stored as {g,u}id_t in the uapi struct posix_acl_xattr_entry are intended to be the values that will be undone in the fs_idmapping when writing to the backing store. So we need to take the fs_idmapping into account during from_vfsuid() in posix_acl_setxattr_idmapped_mnt(). Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Fixes: 0c5fd887d2bb ("acl: move idmapped mount fixup into vfs_{g,s}etxattr()") Cc: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) Reviewed-by: Seth Forshee Link: https://lore.kernel.org/r/20220816113514.43304-1-brauner@kernel.org --- fs/overlayfs/inode.c | 11 +++++++---- fs/posix_acl.c | 15 +++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b45fea69fff3..0fbcb590af84 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -460,9 +460,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) * of the POSIX ACLs retrieved from the lower layer to this function to not * alter the POSIX ACLs for the underlying filesystem. */ -static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns, +static void ovl_idmap_posix_acl(struct inode *realinode, + struct user_namespace *mnt_userns, struct posix_acl *acl) { + struct user_namespace *fs_userns = i_user_ns(realinode); + for (unsigned int i = 0; i < acl->a_count; i++) { vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -470,11 +473,11 @@ static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns, struct posix_acl_entry *e = &acl->a_entries[i]; switch (e->e_tag) { case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid); + vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid); e->e_uid = vfsuid_into_kuid(vfsuid); break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid); + vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid); e->e_gid = vfsgid_into_kgid(vfsgid); break; } @@ -536,7 +539,7 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) if (!clone) clone = ERR_PTR(-ENOMEM); else - ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone); + ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone); /* * Since we're not in RCU path walk we always need to release the * original ACLs. diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 1d17d7b13dcd..5af33800743e 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -361,6 +361,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, const struct posix_acl *acl, int want) { const struct posix_acl_entry *pa, *pe, *mask_obj; + struct user_namespace *fs_userns = i_user_ns(inode); int found = 0; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -376,7 +377,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, goto check_perm; break; case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, + vfsuid = make_vfsuid(mnt_userns, fs_userns, pa->e_uid); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto mask; @@ -390,7 +391,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, } break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, + vfsgid = make_vfsgid(mnt_userns, fs_userns, pa->e_gid); if (vfsgid_in_group_p(vfsgid)) { found = 1; @@ -736,6 +737,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; + struct user_namespace *fs_userns = i_user_ns(inode); int count; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -753,13 +755,13 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, switch (le16_to_cpu(entry->e_tag)) { case ACL_USER: uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid); + vfsuid = make_vfsuid(mnt_userns, fs_userns, uid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid))); break; case ACL_GROUP: gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid); + vfsgid = make_vfsgid(mnt_userns, fs_userns, gid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid))); break; @@ -775,6 +777,7 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; + struct user_namespace *fs_userns = i_user_ns(inode); int count; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -793,13 +796,13 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, case ACL_USER: uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); vfsuid = VFSUIDT_INIT(uid); - uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid); + uid = from_vfsuid(mnt_userns, fs_userns, vfsuid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid)); break; case ACL_GROUP: gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); vfsgid = VFSGIDT_INIT(gid); - gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid); + gid = from_vfsgid(mnt_userns, fs_userns, vfsgid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid)); break; default: -- cgit v1.2.3 From bf1ac16edf6770a92bc75cf2373f1f9feea398a4 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 16 Aug 2022 11:47:52 -0500 Subject: fs: require CAP_SYS_ADMIN in target namespace for idmapped mounts Idmapped mounts should not allow a user to map file ownsership into a range of ids which is not under the control of that user. However, we currently don't check whether the mounter is privileged wrt to the target user namespace. Currently no FS_USERNS_MOUNT filesystems support idmapped mounts, thus this is not a problem as only CAP_SYS_ADMIN in init_user_ns is allowed to set up idmapped mounts. But this could change in the future, so add a check to refuse to create idmapped mounts when the mounter does not have CAP_SYS_ADMIN in the target user namespace. Fixes: bd303368b776 ("fs: support mapped mounts of mapped filesystems") Signed-off-by: Seth Forshee Reviewed-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220816164752.2595240-1-sforshee@digitalocean.com Signed-off-by: Christian Brauner (Microsoft) --- fs/namespace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 68789f896f08..df137ba19d37 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, err = -EPERM; goto out_fput; } + + /* We're not controlling the target namespace. */ + if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_fput; + } + kattr->mnt_userns = get_user_ns(mnt_userns); out_fput: -- cgit v1.2.3 From 0c3bc7899e6dfb52df1c46118a5a670ae619645f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Jul 2022 14:32:52 +0200 Subject: ntfs: fix acl handling While looking at our current POSIX ACL handling in the context of some overlayfs work I went through a range of other filesystems checking how they handle them currently and encountered ntfs3. The posic_acl_{from,to}_xattr() helpers always need to operate on the filesystem idmapping. Since ntfs3 can only be mounted in the initial user namespace the relevant idmapping is init_user_ns. The posix_acl_{from,to}_xattr() helpers are concerned with translating between the kernel internal struct posix_acl{_entry} and the uapi struct posix_acl_xattr_{header,entry} and the kernel internal data structure is cached filesystem wide. Additional idmappings such as the caller's idmapping or the mount's idmapping are handled higher up in the VFS. Individual filesystems usually do not need to concern themselves with these. The posix_acl_valid() helper is concerned with checking whether the values in the kernel internal struct posix_acl can be represented in the filesystem's idmapping. IOW, if they can be written to disk. So this helper too needs to take the filesystem's idmapping. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Cc: Konstantin Komarov Cc: ntfs3@lists.linux.dev Signed-off-by: Christian Brauner (Microsoft) --- fs/ntfs3/xattr.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 5e0e0280e70d..3e9118705174 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -478,8 +478,7 @@ out: } #ifdef CONFIG_NTFS3_FS_POSIX_ACL -static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, - struct inode *inode, int type, +static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, int locked) { struct ntfs_inode *ni = ntfs_i(inode); @@ -514,7 +513,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, /* Translate extended attribute to acl. */ if (err >= 0) { - acl = posix_acl_from_xattr(mnt_userns, buf, err); + acl = posix_acl_from_xattr(&init_user_ns, buf, err); } else if (err == -ENODATA) { acl = NULL; } else { @@ -537,8 +536,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) if (rcu) return ERR_PTR(-ECHILD); - /* TODO: init_user_ns? */ - return ntfs_get_acl_ex(&init_user_ns, inode, type, 0); + return ntfs_get_acl_ex(inode, type, 0); } static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, @@ -595,7 +593,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, value = kmalloc(size, GFP_NOFS); if (!value) return -ENOMEM; - err = posix_acl_to_xattr(mnt_userns, acl, value, size); + err = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (err < 0) goto out; flags = 0; @@ -641,7 +639,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, if (!acl) return -ENODATA; - err = posix_acl_to_xattr(mnt_userns, acl, buffer, size); + err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return err; @@ -665,12 +663,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, if (!value) { acl = NULL; } else { - acl = posix_acl_from_xattr(mnt_userns, value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { - err = posix_acl_valid(mnt_userns, acl); + err = posix_acl_valid(&init_user_ns, acl); if (err) goto release_and_out; } -- cgit v1.2.3