From 14e43bf435612639cab01541fce7cc41bf7e370b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Sep 2020 09:44:18 -0700 Subject: vfs: don't unnecessarily clone write access for writable fds There's no need for mnt_want_write_file() to increment mnt_writers when the file is already open for writing, provided that mnt_drop_write_file() is changed to conditionally decrement it. We seem to have ended up in the current situation because mnt_want_write_file() used to be paired with mnt_drop_write(), due to mnt_drop_write_file() not having been added yet. So originally mnt_want_write_file() had to always increment mnt_writers. But later mnt_drop_write_file() was added, and all callers of mnt_want_write_file() were paired with it. This makes the compatibility between mnt_want_write_file() and mnt_drop_write() no longer necessary. Therefore, make __mnt_want_write_file() and __mnt_drop_write_file() skip incrementing mnt_writers on files already open for writing. This removes the only caller of mnt_clone_write(), so remove that too. Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- Documentation/filesystems/porting.rst | 7 +++++ fs/namespace.c | 53 +++++++++++++---------------------- include/linux/mount.h | 1 - 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 867036aa90b8..6a6d3e673b48 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -865,3 +865,10 @@ no matter what. Everything is handled by the caller. clone_private_mount() returns a longterm mount now, so the proper destructor of its result is kern_unmount() or kern_unmount_array(). + +--- + +**mandatory** + +mnt_want_write_file() can now only be paired with mnt_drop_write_file(), +whereas previously it could be paired with mnt_drop_write() as well. diff --git a/fs/namespace.c b/fs/namespace.c index d2db7dfe232b..9f2d94e0f3e0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -359,51 +359,37 @@ int mnt_want_write(struct vfsmount *m) } EXPORT_SYMBOL_GPL(mnt_want_write); -/** - * mnt_clone_write - get write access to a mount - * @mnt: the mount on which to take a write - * - * This is effectively like mnt_want_write, except - * it must only be used to take an extra write reference - * on a mountpoint that we already know has a write reference - * on it. This allows some optimisation. - * - * After finished, mnt_drop_write must be called as usual to - * drop the reference. - */ -int mnt_clone_write(struct vfsmount *mnt) -{ - /* superblock may be r/o */ - if (__mnt_is_readonly(mnt)) - return -EROFS; - preempt_disable(); - mnt_inc_writers(real_mount(mnt)); - preempt_enable(); - return 0; -} -EXPORT_SYMBOL_GPL(mnt_clone_write); - /** * __mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * - * This is like __mnt_want_write, but it takes a file and can - * do some optimisations if the file is open for write already + * This is like __mnt_want_write, but if the file is already open for writing it + * skips incrementing mnt_writers (since the open file already has a reference) + * and instead only does the check for emergency r/o remounts. This must be + * paired with __mnt_drop_write_file. */ int __mnt_want_write_file(struct file *file) { - if (!(file->f_mode & FMODE_WRITER)) - return __mnt_want_write(file->f_path.mnt); - else - return mnt_clone_write(file->f_path.mnt); + if (file->f_mode & FMODE_WRITER) { + /* + * Superblock may have become readonly while there are still + * writable fd's, e.g. due to a fs error with errors=remount-ro + */ + if (__mnt_is_readonly(file->f_path.mnt)) + return -EROFS; + return 0; + } + return __mnt_want_write(file->f_path.mnt); } /** * mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * - * This is like mnt_want_write, but it takes a file and can - * do some optimisations if the file is open for write already + * This is like mnt_want_write, but if the file is already open for writing it + * skips incrementing mnt_writers (since the open file already has a reference) + * and instead only does the freeze protection and the check for emergency r/o + * remounts. This must be paired with mnt_drop_write_file. */ int mnt_want_write_file(struct file *file) { @@ -449,7 +435,8 @@ EXPORT_SYMBOL_GPL(mnt_drop_write); void __mnt_drop_write_file(struct file *file) { - __mnt_drop_write(file->f_path.mnt); + if (!(file->f_mode & FMODE_WRITER)) + __mnt_drop_write(file->f_path.mnt); } void mnt_drop_write_file(struct file *file) diff --git a/include/linux/mount.h b/include/linux/mount.h index aaf343b38671..b43191fe6af7 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -79,7 +79,6 @@ struct path; extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); -extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); -- cgit v1.2.3