From 07fd7c329839cf0b8c7766883d830a1a0d12d1dd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 18 Feb 2024 14:50:13 +0100 Subject: libfs: add path_from_stashed() Add a helper for both nsfs and pidfs to reuse an already stashed dentry or to add and stash a new dentry. Link: https://lore.kernel.org/r/20240218-neufahrzeuge-brauhaus-fb0eb6459771@brauner Signed-off-by: Christian Brauner --- fs/libfs.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index eec6031b0155..2a55e87e1439 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1973,3 +1973,97 @@ struct timespec64 simple_inode_init_ts(struct inode *inode) return ts; } EXPORT_SYMBOL(simple_inode_init_ts); + +static inline struct dentry *get_stashed_dentry(struct dentry *stashed) +{ + struct dentry *dentry; + + guard(rcu)(); + dentry = READ_ONCE(stashed); + if (!dentry) + return NULL; + if (!lockref_get_not_dead(&dentry->d_lockref)) + return NULL; + return dentry; +} + +static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, + struct super_block *sb, + const struct file_operations *fops, + void *data) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_anon(sb); + if (!dentry) + return ERR_PTR(-ENOMEM); + + inode = new_inode_pseudo(sb); + if (!inode) { + dput(dentry); + return ERR_PTR(-ENOMEM); + } + + inode->i_ino = ino; + inode->i_flags |= S_IMMUTABLE; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = fops; + inode->i_private = data; + simple_inode_init_ts(inode); + + /* @data is now owned by the fs */ + d_instantiate(dentry, inode); + + if (cmpxchg(stashed, NULL, dentry)) { + d_delete(dentry); /* make sure ->d_prune() does nothing */ + dput(dentry); + cpu_relax(); + return ERR_PTR(-EAGAIN); + } + + return dentry; +} + +/** + * path_from_stashed - create path from stashed or new dentry + * @stashed: where to retrieve or stash dentry + * @ino: inode number to use + * @mnt: mnt of the filesystems to use + * @fops: file operations to use + * @data: data to store in inode->i_private + * @path: path to create + * + * The function tries to retrieve a stashed dentry from @stashed. If the dentry + * is still valid then it will be reused. If the dentry isn't able the function + * will allocate a new dentry and inode. It will then try to update @stashed + * with the newly added dentry. If it fails -EAGAIN is returned and the caller + * my retry. + * + * Special-purpose helper for nsfs and pidfs. + * + * Return: If 0 or an error is returned the caller can be sure that @data must + * be cleaned up. If 1 or -EAGAIN is returned @data is owned by the + * filesystem. + */ +int path_from_stashed(struct dentry **stashed, unsigned long ino, + struct vfsmount *mnt, const struct file_operations *fops, + void *data, struct path *path) +{ + struct dentry *dentry; + int ret = 0; + + dentry = get_stashed_dentry(*stashed); + if (dentry) + goto out_path; + + dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, data); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + ret = 1; + +out_path: + path->dentry = dentry; + path->mnt = mntget(mnt); + return ret; +} -- cgit v1.2.3 From b28ddcc32d8fa3e20745b3a47dff863fe0376d79 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 19 Feb 2024 16:30:57 +0100 Subject: pidfs: convert to path_from_stashed() helper Moving pidfds from the anonymous inode infrastructure to a separate tiny in-kernel filesystem similar to sockfs, pipefs, and anon_inodefs causes selinux denials and thus various userspace components that make heavy use of pidfds to fail as pidfds used anon_inode_getfile() which aren't subject to any LSM hooks. But dentry_open() is and that would cause regressions. The failures that are seen are selinux denials. But the core failure is dbus-broker. That cascades into other services failing that depend on dbus-broker. For example, when dbus-broker fails to start polkit and all the others won't be able to work because they depend on dbus-broker. The reason for dbus-broker failing is because it doesn't handle failures for SO_PEERPIDFD correctly. Last kernel release we introduced SO_PEERPIDFD (and SCM_PIDFD). SO_PEERPIDFD allows dbus-broker and polkit and others to receive a pidfd for the peer of an AF_UNIX socket. This is the first time in the history of Linux that we can safely authenticate clients in a race-free manner. dbus-broker immediately made use of this but messed up the error checking. It only allowed EINVAL as a valid failure for SO_PEERPIDFD. That's obviously problematic not just because of LSM denials but because of seccomp denials that would prevent SO_PEERPIDFD from working; or any other new error code from there. So this is catching a flawed implementation in dbus-broker as well. It has to fallback to the old pid-based authentication when SO_PEERPIDFD doesn't work no matter the reasons otherwise it'll always risk such failures. So overall that LSM denial should not have caused dbus-broker to fail. It can never assume that a feature released one kernel ago like SO_PEERPIDFD can be assumed to be available. So, the next fix separate from the selinux policy update is to try and fix dbus-broker at [3]. That should make it into Fedora as well. In addition the selinux reference policy should also be updated. See [4] for that. If Selinux is in enforcing mode in userspace and it encounters anything that it doesn't know about it will deny it by default. And the policy is entirely in userspace including declaring new types for stuff like nsfs or pidfs to allow it. For now we continue to raise S_PRIVATE on the inode if it's a pidfs inode which means things behave exactly like before. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2265630 Link: https://github.com/fedora-selinux/selinux-policy/pull/2050 Link: https://github.com/bus1/dbus-broker/pull/343 [3] Link: https://github.com/SELinuxProject/refpolicy/pull/762 [4] Reported-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240222190334.GA412503@dev-arch.thelio-3990X Link: https://lore.kernel.org/r/20240218-neufahrzeuge-brauhaus-fb0eb6459771@brauner Signed-off-by: Christian Brauner --- fs/internal.h | 3 ++- fs/libfs.c | 15 ++++++++++--- fs/nsfs.c | 7 +++--- fs/pidfs.c | 59 +++++++++++++++++++++++++++++++++------------------ include/linux/pid.h | 1 + include/linux/pidfs.h | 1 + kernel/pid.c | 1 + 7 files changed, 59 insertions(+), 28 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/internal.h b/fs/internal.h index cfddaec6fbf6..a34531bcad6e 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -312,4 +312,5 @@ struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); int path_from_stashed(struct dentry **stashed, unsigned long ino, struct vfsmount *mnt, const struct file_operations *fops, - void *data, struct path *path); + const struct inode_operations *iops, void *data, + struct path *path); diff --git a/fs/libfs.c b/fs/libfs.c index 2a55e87e1439..2acba9d53756 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -1990,6 +1991,7 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, struct super_block *sb, const struct file_operations *fops, + const struct inode_operations *iops, void *data) { struct dentry *dentry; @@ -2007,8 +2009,13 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, inode->i_ino = ino; inode->i_flags |= S_IMMUTABLE; + if (is_pidfs_sb(sb)) + inode->i_flags |= S_PRIVATE; inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = fops; + if (iops) + inode->i_op = iops; + if (fops) + inode->i_fop = fops; inode->i_private = data; simple_inode_init_ts(inode); @@ -2030,6 +2037,7 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, * @stashed: where to retrieve or stash dentry * @ino: inode number to use * @mnt: mnt of the filesystems to use + * @iops: inode operations to use * @fops: file operations to use * @data: data to store in inode->i_private * @path: path to create @@ -2048,7 +2056,8 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, */ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct vfsmount *mnt, const struct file_operations *fops, - void *data, struct path *path) + const struct inode_operations *iops, void *data, + struct path *path) { struct dentry *dentry; int ret = 0; @@ -2057,7 +2066,7 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, if (dentry) goto out_path; - dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, data); + dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); ret = 1; diff --git a/fs/nsfs.c b/fs/nsfs.c index 39dc2604bec0..e2da645c3d02 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -67,7 +67,7 @@ int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, if (!ns) return -ENOENT; ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt, - &ns_file_operations, ns, path); + &ns_file_operations, NULL, ns, path); if (ret <= 0 && ret != -EAGAIN) ns->ops->put(ns); } while (ret == -EAGAIN); @@ -122,8 +122,9 @@ int open_related_ns(struct ns_common *ns, return PTR_ERR(relative); } - err = path_from_stashed(&relative->stashed, relative->inum, nsfs_mnt, - &ns_file_operations, relative, &path); + err = path_from_stashed(&relative->stashed, relative->inum, + nsfs_mnt, &ns_file_operations, NULL, + relative, &path); if (err <= 0 && err != -EAGAIN) relative->ops->put(relative); } while (err == -EAGAIN); diff --git a/fs/pidfs.c b/fs/pidfs.c index 6c3f010074af..cf606f15def5 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -14,6 +14,8 @@ #include #include +#include "internal.h" + static int pidfd_release(struct inode *inode, struct file *file) { #ifndef CONFIG_FS_PID @@ -186,9 +188,21 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) d_inode(dentry)->i_ino); } +static void pidfs_prune_dentry(struct dentry *dentry) +{ + struct inode *inode; + + inode = d_inode(dentry); + if (inode) { + struct pid *pid = inode->i_private; + WRITE_ONCE(pid->stashed, NULL); + } +} + static const struct dentry_operations pidfs_dentry_operations = { .d_delete = always_delete_dentry, .d_dname = pidfs_dname, + .d_prune = pidfs_prune_dentry, }; static int pidfs_init_fs_context(struct fs_context *fc) @@ -213,34 +227,28 @@ static struct file_system_type pidfs_type = { struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) { - struct inode *inode; struct file *pidfd_file; + struct path path; + int ret; - inode = iget_locked(pidfs_sb, pid->ino); - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) { + do { /* * Inode numbering for pidfs start at RESERVED_PIDS + 1. * This avoids collisions with the root inode which is 1 * for pseudo filesystems. */ - inode->i_ino = pid->ino; - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_op = &pidfs_inode_operations; - inode->i_fop = &pidfs_file_operations; - inode->i_flags |= S_IMMUTABLE; - inode->i_private = get_pid(pid); - simple_inode_init_ts(inode); - unlock_new_inode(inode); - } - - pidfd_file = alloc_file_pseudo(inode, pidfs_mnt, "", flags, - &pidfs_file_operations); - if (IS_ERR(pidfd_file)) - iput(inode); - + ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt, + &pidfs_file_operations, + &pidfs_inode_operations, get_pid(pid), + &path); + if (ret <= 0 && ret != -EAGAIN) + put_pid(pid); + } while (ret == -EAGAIN); + if (ret < 0) + return ERR_PTR(ret); + + pidfd_file = dentry_open(&path, flags, current_cred()); + path_put(&path); return pidfd_file; } @@ -253,6 +261,11 @@ void __init pidfs_init(void) pidfs_sb = pidfs_mnt->mnt_sb; } +bool is_pidfs_sb(const struct super_block *sb) +{ + return sb == pidfs_mnt->mnt_sb; +} + #else /* !CONFIG_FS_PID */ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) @@ -269,4 +282,8 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) } void __init pidfs_init(void) { } +bool is_pidfs_sb(const struct super_block *sb) +{ + return false; +} #endif diff --git a/include/linux/pid.h b/include/linux/pid.h index 956481128e8d..c79a0efd0258 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -56,6 +56,7 @@ struct pid unsigned int level; spinlock_t lock; #ifdef CONFIG_FS_PID + struct dentry *stashed; unsigned long ino; #endif /* lists of tasks that use this pid */ diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 75bdf9807802..40dd325a32a6 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -4,5 +4,6 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); +bool is_pidfs_sb(const struct super_block *sb); #endif /* _LINUX_PID_FS_H */ diff --git a/kernel/pid.c b/kernel/pid.c index 581cc34341fd..99a0c5eb24b8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -281,6 +281,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; #ifdef CONFIG_FS_PID + pid->stashed = NULL; pid->ino = ++pidfs_ino; #endif for ( ; upid >= pid->numbers; --upid) { -- cgit v1.2.3 From 159a0d9fd50b92cc48e4c82cde79c4cb34c85953 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 18 Feb 2024 14:52:24 +0100 Subject: libfs: improve path_from_stashed() helper In earlier patches we moved both nsfs and pidfs to path_from_stashed(). The helper currently tries to add and stash a new dentry if a reusable dentry couldn't be found and returns EAGAIN if it lost the race to stash the dentry. The caller can use EAGAIN to retry. The helper and the two filesystems be written in a way that makes returning EAGAIN unnecessary. To do this we need to change the dentry->d_prune() implementation of nsfs and pidfs to not simply replace the stashed dentry with NULL but to use a cmpxchg() and only replace their own dentry. Then path_from_stashed() can then be changed to not just stash a new dentry when no dentry is currently stashed but also when an already dead dentry is stashed. If another task managed to install a dentry in the meantime it can simply be reused. Pack that into a loop and call it a day. Suggested-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=wgtLF5Z5=15-LKAczWm=-tUjHO+Bpf7WjBG+UU3s=fEQw@mail.gmail.com Signed-off-by: Christian Brauner --- fs/libfs.c | 61 ++++++++++++++++++++++++++++++++++++++++--------------------- fs/nsfs.c | 50 ++++++++++++++++++++++---------------------------- fs/pidfs.c | 28 +++++++++++----------------- 3 files changed, 73 insertions(+), 66 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index 2acba9d53756..7617e1bc6e5b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1988,11 +1988,11 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) return dentry; } -static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, - struct super_block *sb, - const struct file_operations *fops, - const struct inode_operations *iops, - void *data) +static struct dentry *prepare_anon_dentry(unsigned long ino, + struct super_block *sb, + const struct file_operations *fops, + const struct inode_operations *iops, + void *data) { struct dentry *dentry; struct inode *inode; @@ -2021,15 +2021,29 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, /* @data is now owned by the fs */ d_instantiate(dentry, inode); + return dentry; +} - if (cmpxchg(stashed, NULL, dentry)) { - d_delete(dentry); /* make sure ->d_prune() does nothing */ - dput(dentry); - cpu_relax(); - return ERR_PTR(-EAGAIN); - } +static struct dentry *stash_dentry(struct dentry **stashed, + struct dentry *dentry) +{ + guard(rcu)(); + for (;;) { + struct dentry *old; - return dentry; + /* Assume any old dentry was cleared out. */ + old = cmpxchg(stashed, NULL, dentry); + if (likely(!old)) + return dentry; + + /* Check if somebody else installed a reusable dentry. */ + if (lockref_get_not_dead(&old->d_lockref)) + return old; + + /* There's an old dead dentry there, try to take it over. */ + if (likely(try_cmpxchg(stashed, &old, dentry))) + return dentry; + } } /** @@ -2044,15 +2058,14 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, * * The function tries to retrieve a stashed dentry from @stashed. If the dentry * is still valid then it will be reused. If the dentry isn't able the function - * will allocate a new dentry and inode. It will then try to update @stashed - * with the newly added dentry. If it fails -EAGAIN is returned and the caller - * my retry. + * will allocate a new dentry and inode. It will then check again whether it + * can reuse an existing dentry in case one has been added in the meantime or + * update @stashed with the newly added dentry. * * Special-purpose helper for nsfs and pidfs. * * Return: If 0 or an error is returned the caller can be sure that @data must - * be cleaned up. If 1 or -EAGAIN is returned @data is owned by the - * filesystem. + * be cleaned up. If 1 is returned @data is owned by the filesystem. */ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct vfsmount *mnt, const struct file_operations *fops, @@ -2062,17 +2075,23 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct dentry *dentry; int ret = 0; - dentry = get_stashed_dentry(*stashed); - if (dentry) + /* See if dentry can be reused. */ + path->dentry = get_stashed_dentry(*stashed); + if (path->dentry) goto out_path; - dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); + /* Allocate a new dentry. */ + dentry = prepare_anon_dentry(ino, mnt->mnt_sb, fops, iops, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); + + /* Added a new dentry. @data is now owned by the filesystem. */ + path->dentry = stash_dentry(stashed, dentry); + if (path->dentry != dentry) + dput(dentry); ret = 1; out_path: - path->dentry = dentry; path->mnt = mntget(mnt); return ret; } diff --git a/fs/nsfs.c b/fs/nsfs.c index e2da645c3d02..3a36bb62353c 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -36,10 +36,12 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) static void ns_prune_dentry(struct dentry *dentry) { - struct inode *inode = d_inode(dentry); + struct inode *inode; + + inode = d_inode(dentry); if (inode) { struct ns_common *ns = inode->i_private; - WRITE_ONCE(ns->stashed, NULL); + cmpxchg(&ns->stashed, dentry, NULL); } } @@ -61,20 +63,17 @@ int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, void *private_data) { int ret; + struct ns_common *ns; - do { - struct ns_common *ns = ns_get_cb(private_data); - if (!ns) - return -ENOENT; - ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt, - &ns_file_operations, NULL, ns, path); - if (ret <= 0 && ret != -EAGAIN) - ns->ops->put(ns); - } while (ret == -EAGAIN); - + ns = ns_get_cb(private_data); + if (!ns) + return -ENOENT; + ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt, + &ns_file_operations, NULL, ns, path); + if (ret <= 0) + ns->ops->put(ns); if (ret < 0) return ret; - return 0; } @@ -105,6 +104,7 @@ int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { struct path path = {}; + struct ns_common *relative; struct file *f; int err; int fd; @@ -113,22 +113,16 @@ int open_related_ns(struct ns_common *ns, if (fd < 0) return fd; - do { - struct ns_common *relative; - - relative = get_ns(ns); - if (IS_ERR(relative)) { - put_unused_fd(fd); - return PTR_ERR(relative); - } - - err = path_from_stashed(&relative->stashed, relative->inum, - nsfs_mnt, &ns_file_operations, NULL, - relative, &path); - if (err <= 0 && err != -EAGAIN) - relative->ops->put(relative); - } while (err == -EAGAIN); + relative = get_ns(ns); + if (IS_ERR(relative)) { + put_unused_fd(fd); + return PTR_ERR(relative); + } + err = path_from_stashed(&relative->stashed, relative->inum, nsfs_mnt, + &ns_file_operations, NULL, relative, &path); + if (err <= 0) + relative->ops->put(relative); if (err < 0) { put_unused_fd(fd); return err; diff --git a/fs/pidfs.c b/fs/pidfs.c index cf606f15def5..5f33c820b7f8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -140,7 +140,6 @@ struct pid *pidfd_pid(const struct file *file) #ifdef CONFIG_FS_PID static struct vfsmount *pidfs_mnt __ro_after_init; -static struct super_block *pidfs_sb __ro_after_init; /* * The vfs falls back to simple_setattr() if i_op->setattr() isn't @@ -195,7 +194,7 @@ static void pidfs_prune_dentry(struct dentry *dentry) inode = d_inode(dentry); if (inode) { struct pid *pid = inode->i_private; - WRITE_ONCE(pid->stashed, NULL); + cmpxchg(&pid->stashed, dentry, NULL); } } @@ -231,19 +230,16 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) struct path path; int ret; - do { - /* - * Inode numbering for pidfs start at RESERVED_PIDS + 1. - * This avoids collisions with the root inode which is 1 - * for pseudo filesystems. - */ - ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt, - &pidfs_file_operations, - &pidfs_inode_operations, get_pid(pid), - &path); - if (ret <= 0 && ret != -EAGAIN) - put_pid(pid); - } while (ret == -EAGAIN); + /* + * Inode numbering for pidfs start at RESERVED_PIDS + 1. + * This avoids collisions with the root inode which is 1 + * for pseudo filesystems. + */ + ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt, + &pidfs_file_operations, &pidfs_inode_operations, + get_pid(pid), &path); + if (ret <= 0) + put_pid(pid); if (ret < 0) return ERR_PTR(ret); @@ -257,8 +253,6 @@ void __init pidfs_init(void) pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); - - pidfs_sb = pidfs_mnt->mnt_sb; } bool is_pidfs_sb(const struct super_block *sb) -- cgit v1.2.3 From 2558e3b23112adb82a558bab616890a790a38bc6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 21 Feb 2024 09:59:51 +0100 Subject: libfs: add stashed_dentry_prune() Both pidfs and nsfs use a memory location to stash a dentry for reuse by concurrent openers. Right now two custom dentry->d_prune::{ns,pidfs}_prune_dentry() methods are needed that do the same thing. The only thing that differs is that they need to get to the memory location to store or retrieve the dentry from differently. Fix that by remember the stashing location for the dentry in dentry->d_fsdata which allows us to retrieve it in dentry->d_prune. That in turn makes it possible to add a common helper that pidfs and nsfs can both use. Link: https://lore.kernel.org/r/CAHk-=wg8cHY=i3m6RnXQ2Y2W8psicKWQEZq1=94ivUiviM-0OA@mail.gmail.com Signed-off-by: Christian Brauner --- fs/internal.h | 1 + fs/libfs.c | 29 +++++++++++++++++++++++++++-- fs/nsfs.c | 16 ++-------------- fs/pidfs.c | 13 +------------ 4 files changed, 31 insertions(+), 28 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/internal.h b/fs/internal.h index a34531bcad6e..b0c843c3fa3c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -314,3 +314,4 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct vfsmount *mnt, const struct file_operations *fops, const struct inode_operations *iops, void *data, struct path *path); +void stashed_dentry_prune(struct dentry *dentry); diff --git a/fs/libfs.c b/fs/libfs.c index 7617e1bc6e5b..472f21bd0325 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1988,7 +1988,8 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) return dentry; } -static struct dentry *prepare_anon_dentry(unsigned long ino, +static struct dentry *prepare_anon_dentry(struct dentry **stashed, + unsigned long ino, struct super_block *sb, const struct file_operations *fops, const struct inode_operations *iops, @@ -2019,6 +2020,9 @@ static struct dentry *prepare_anon_dentry(unsigned long ino, inode->i_private = data; simple_inode_init_ts(inode); + /* Store address of location where dentry's supposed to be stashed. */ + dentry->d_fsdata = stashed; + /* @data is now owned by the fs */ d_instantiate(dentry, inode); return dentry; @@ -2081,7 +2085,7 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, goto out_path; /* Allocate a new dentry. */ - dentry = prepare_anon_dentry(ino, mnt->mnt_sb, fops, iops, data); + dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -2092,6 +2096,27 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, ret = 1; out_path: + WARN_ON_ONCE(path->dentry->d_fsdata != stashed); + WARN_ON_ONCE(d_inode(path->dentry)->i_private != data); path->mnt = mntget(mnt); return ret; } + +void stashed_dentry_prune(struct dentry *dentry) +{ + struct dentry **stashed = dentry->d_fsdata; + struct inode *inode = d_inode(dentry); + + if (WARN_ON_ONCE(!stashed)) + return; + + if (!inode) + return; + + /* + * Only replace our own @dentry as someone else might've + * already cleared out @dentry and stashed their own + * dentry in there. + */ + cmpxchg(stashed, dentry, NULL); +} diff --git a/fs/nsfs.c b/fs/nsfs.c index 3a36bb62353c..2ce229af34e9 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -34,22 +34,10 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) ns_ops->name, inode->i_ino); } -static void ns_prune_dentry(struct dentry *dentry) -{ - struct inode *inode; - - inode = d_inode(dentry); - if (inode) { - struct ns_common *ns = inode->i_private; - cmpxchg(&ns->stashed, dentry, NULL); - } -} - -const struct dentry_operations ns_dentry_operations = -{ - .d_prune = ns_prune_dentry, +const struct dentry_operations ns_dentry_operations = { .d_delete = always_delete_dentry, .d_dname = ns_dname, + .d_prune = stashed_dentry_prune, }; static void nsfs_evict(struct inode *inode) diff --git a/fs/pidfs.c b/fs/pidfs.c index 5f33c820b7f8..d38b7a184994 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -187,21 +187,10 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) d_inode(dentry)->i_ino); } -static void pidfs_prune_dentry(struct dentry *dentry) -{ - struct inode *inode; - - inode = d_inode(dentry); - if (inode) { - struct pid *pid = inode->i_private; - cmpxchg(&pid->stashed, dentry, NULL); - } -} - static const struct dentry_operations pidfs_dentry_operations = { .d_delete = always_delete_dentry, .d_dname = pidfs_dname, - .d_prune = pidfs_prune_dentry, + .d_prune = stashed_dentry_prune, }; static int pidfs_init_fs_context(struct fs_context *fc) -- cgit v1.2.3 From e9c5263ce16d96311c118111ac779f004be8b473 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 Mar 2024 10:26:03 +0100 Subject: libfs: improve path_from_stashed() Right now we pass a bunch of info that is fs specific which doesn't make a lot of sense and it bleeds fs sepcific details into the generic helper. nsfs and pidfs have slightly different needs when initializing inodes. Add simple operations that are stashed in sb->s_fs_info that both can implement. This also allows us to get rid of cleaning up references in the caller. All in all path_from_stashed() becomes way simpler. Signed-off-by: Christian Brauner --- fs/internal.h | 8 +++++--- fs/libfs.c | 41 ++++++++++++++++++----------------------- fs/nsfs.c | 33 ++++++++++++++++++++++----------- fs/pidfs.c | 24 +++++++++++++++++++++--- 4 files changed, 66 insertions(+), 40 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/internal.h b/fs/internal.h index b0c843c3fa3c..7d3edcdf59cc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -310,8 +310,10 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); +struct stashed_operations { + void (*put_data)(void *data); + void (*init_inode)(struct inode *inode, void *data); +}; int path_from_stashed(struct dentry **stashed, unsigned long ino, - struct vfsmount *mnt, const struct file_operations *fops, - const struct inode_operations *iops, void *data, - struct path *path); + struct vfsmount *mnt, void *data, struct path *path); void stashed_dentry_prune(struct dentry *dentry); diff --git a/fs/libfs.c b/fs/libfs.c index 472f21bd0325..65322e11bcda 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1991,12 +1991,11 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) static struct dentry *prepare_anon_dentry(struct dentry **stashed, unsigned long ino, struct super_block *sb, - const struct file_operations *fops, - const struct inode_operations *iops, void *data) { struct dentry *dentry; struct inode *inode; + const struct stashed_operations *sops = sb->s_fs_info; dentry = d_alloc_anon(sb); if (!dentry) @@ -2010,15 +2009,13 @@ static struct dentry *prepare_anon_dentry(struct dentry **stashed, inode->i_ino = ino; inode->i_flags |= S_IMMUTABLE; - if (is_pidfs_sb(sb)) - inode->i_flags |= S_PRIVATE; - inode->i_mode = S_IFREG | S_IRUGO; - if (iops) - inode->i_op = iops; - if (fops) - inode->i_fop = fops; - inode->i_private = data; + inode->i_mode = S_IFREG; simple_inode_init_ts(inode); + sops->init_inode(inode, data); + + /* Notice when this is changed. */ + WARN_ON_ONCE(!S_ISREG(inode->i_mode)); + WARN_ON_ONCE(!IS_IMMUTABLE(inode)); /* Store address of location where dentry's supposed to be stashed. */ dentry->d_fsdata = stashed; @@ -2055,8 +2052,6 @@ static struct dentry *stash_dentry(struct dentry **stashed, * @stashed: where to retrieve or stash dentry * @ino: inode number to use * @mnt: mnt of the filesystems to use - * @iops: inode operations to use - * @fops: file operations to use * @data: data to store in inode->i_private * @path: path to create * @@ -2068,38 +2063,38 @@ static struct dentry *stash_dentry(struct dentry **stashed, * * Special-purpose helper for nsfs and pidfs. * - * Return: If 0 or an error is returned the caller can be sure that @data must - * be cleaned up. If 1 is returned @data is owned by the filesystem. + * Return: On success zero and on failure a negative error is returned. */ int path_from_stashed(struct dentry **stashed, unsigned long ino, - struct vfsmount *mnt, const struct file_operations *fops, - const struct inode_operations *iops, void *data, - struct path *path) + struct vfsmount *mnt, void *data, struct path *path) { struct dentry *dentry; - int ret = 0; + const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info; /* See if dentry can be reused. */ path->dentry = get_stashed_dentry(*stashed); - if (path->dentry) + if (path->dentry) { + sops->put_data(data); goto out_path; + } /* Allocate a new dentry. */ - dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); - if (IS_ERR(dentry)) + dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, data); + if (IS_ERR(dentry)) { + sops->put_data(data); return PTR_ERR(dentry); + } /* Added a new dentry. @data is now owned by the filesystem. */ path->dentry = stash_dentry(stashed, dentry); if (path->dentry != dentry) dput(dentry); - ret = 1; out_path: WARN_ON_ONCE(path->dentry->d_fsdata != stashed); WARN_ON_ONCE(d_inode(path->dentry)->i_private != data); path->mnt = mntget(mnt); - return ret; + return 0; } void stashed_dentry_prune(struct dentry *dentry) diff --git a/fs/nsfs.c b/fs/nsfs.c index 2ce229af34e9..7aaafb5cb9fc 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -50,19 +50,13 @@ static void nsfs_evict(struct inode *inode) int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, void *private_data) { - int ret; struct ns_common *ns; ns = ns_get_cb(private_data); if (!ns) return -ENOENT; - ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt, - &ns_file_operations, NULL, ns, path); - if (ret <= 0) - ns->ops->put(ns); - if (ret < 0) - return ret; - return 0; + + return path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt, ns, path); } struct ns_get_path_task_args { @@ -108,9 +102,7 @@ int open_related_ns(struct ns_common *ns, } err = path_from_stashed(&relative->stashed, relative->inum, nsfs_mnt, - &ns_file_operations, NULL, relative, &path); - if (err <= 0) - relative->ops->put(relative); + relative, &path); if (err < 0) { put_unused_fd(fd); return err; @@ -207,6 +199,24 @@ static const struct super_operations nsfs_ops = { .show_path = nsfs_show_path, }; +static void nsfs_init_inode(struct inode *inode, void *data) +{ + inode->i_private = data; + inode->i_mode |= S_IRUGO; + inode->i_fop = &ns_file_operations; +} + +static void nsfs_put_data(void *data) +{ + struct ns_common *ns = data; + ns->ops->put(ns); +} + +static const struct stashed_operations nsfs_stashed_ops = { + .init_inode = nsfs_init_inode, + .put_data = nsfs_put_data, +}; + static int nsfs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); @@ -214,6 +224,7 @@ static int nsfs_init_fs_context(struct fs_context *fc) return -ENOMEM; ctx->ops = &nsfs_ops; ctx->dops = &ns_dentry_operations; + fc->s_fs_info = (void *)&nsfs_stashed_ops; return 0; } diff --git a/fs/pidfs.c b/fs/pidfs.c index d38b7a184994..8fd71a00be9c 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -193,6 +193,26 @@ static const struct dentry_operations pidfs_dentry_operations = { .d_prune = stashed_dentry_prune, }; +static void pidfs_init_inode(struct inode *inode, void *data) +{ + inode->i_private = data; + inode->i_flags |= S_PRIVATE; + inode->i_mode |= S_IRWXU; + inode->i_op = &pidfs_inode_operations; + inode->i_fop = &pidfs_file_operations; +} + +static void pidfs_put_data(void *data) +{ + struct pid *pid = data; + put_pid(pid); +} + +static const struct stashed_operations pidfs_stashed_ops = { + .init_inode = pidfs_init_inode, + .put_data = pidfs_put_data, +}; + static int pidfs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx; @@ -203,6 +223,7 @@ static int pidfs_init_fs_context(struct fs_context *fc) ctx->ops = &pidfs_sops; ctx->dops = &pidfs_dentry_operations; + fc->s_fs_info = (void *)&pidfs_stashed_ops; return 0; } @@ -225,10 +246,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) * for pseudo filesystems. */ ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt, - &pidfs_file_operations, &pidfs_inode_operations, get_pid(pid), &path); - if (ret <= 0) - put_pid(pid); if (ret < 0) return ERR_PTR(ret); -- cgit v1.2.3