From d3b1084dfd629ef89bc1c4bab95e5cb87e7d08c2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:40 +0200 Subject: vfs: make open_with_fake_path() not contribute to nr_files Stacking file operations in overlay will store an extra open file for each overlay file opened. The overhead is just that of "struct file" which is about 256bytes, because overlay already pins an extra dentry and inode when the file is open, which add up to a much larger overhead. For fear of breaking working setups, don't start accounting the extra file. Signed-off-by: Miklos Szeredi --- fs/internal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/internal.h') diff --git a/fs/internal.h b/fs/internal.h index 52a346903748..442098fa0a84 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -94,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); * file_table.c */ extern struct file *alloc_empty_file(int, const struct cred *); +extern struct file *alloc_empty_file_noaccount(int, const struct cred *); /* * super.c -- cgit v1.2.3 From 9df6702ad0e85901450fe48a7b5f0f8975353eeb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:40 +0200 Subject: vfs: export vfs_ioctl() to modules This is needed by the stacked ioctl implementation in overlayfs. Signed-off-by: Miklos Szeredi --- fs/internal.h | 1 - fs/ioctl.c | 1 + include/linux/fs.h | 2 ++ 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/internal.h') diff --git a/fs/internal.h b/fs/internal.h index 442098fa0a84..9c3b4c40e582 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -184,7 +184,6 @@ extern const struct dentry_operations ns_dentry_operations; */ extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, unsigned long arg); -extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* * iomap support: diff --git a/fs/ioctl.c b/fs/ioctl.c index b445b13fc59b..3212c29235ce 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) out: return error; } +EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { diff --git a/include/linux/fs.h b/include/linux/fs.h index e1884840d556..019817a083a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1634,6 +1634,8 @@ int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); +extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); + /* * VFS file helper functions. */ -- cgit v1.2.3 From 6742cee04353231015ddbe7e8b404ac9c1eb4473 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:43 +0200 Subject: Revert "ovl: don't allow writing ioctl on lower layer" This reverts commit 7c6893e3c9abf6a9676e060a1e35e5caca673d57. Overlayfs no longer relies on the vfs for checking writability of files. Signed-off-by: Miklos Szeredi --- fs/internal.h | 2 -- fs/namespace.c | 64 +++------------------------------------------------------- fs/open.c | 4 ++-- fs/xattr.c | 9 ++++----- 4 files changed, 9 insertions(+), 70 deletions(-) (limited to 'fs/internal.h') diff --git a/fs/internal.h b/fs/internal.h index 9c3b4c40e582..abb41f346b18 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -80,10 +80,8 @@ extern void __init mnt_init(void); extern int __mnt_want_write(struct vfsmount *); extern int __mnt_want_write_file(struct file *); -extern int mnt_want_write_file_path(struct file *); extern void __mnt_drop_write(struct vfsmount *); extern void __mnt_drop_write_file(struct file *); -extern void mnt_drop_write_file_path(struct file *); /* * fs_struct.c diff --git a/fs/namespace.c b/fs/namespace.c index 76a742e36b32..c16921dba157 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -431,18 +431,13 @@ int __mnt_want_write_file(struct file *file) } /** - * mnt_want_write_file_path - get write access to a file's mount + * mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * * This is like mnt_want_write, but it takes a file and can * do some optimisations if the file is open for write already - * - * Called by the vfs for cases when we have an open file at hand, but will do an - * inode operation on it (important distinction for files opened on overlayfs, - * since the file operations will come from the real underlying file, while - * inode operations come from the overlay). */ -int mnt_want_write_file_path(struct file *file) +int mnt_want_write_file(struct file *file) { int ret; @@ -452,53 +447,6 @@ int mnt_want_write_file_path(struct file *file) sb_end_write(file->f_path.mnt->mnt_sb); return ret; } - -static inline int may_write_real(struct file *file) -{ - struct dentry *dentry = file->f_path.dentry; - struct dentry *upperdentry; - - /* Writable file? */ - if (file->f_mode & FMODE_WRITER) - return 0; - - /* Not overlayfs? */ - if (likely(!(dentry->d_flags & DCACHE_OP_REAL))) - return 0; - - /* File refers to upper, writable layer? */ - upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); - if (upperdentry && file_inode(file) == d_inode(upperdentry)) - return 0; - - /* Lower layer: can't write to real file, sorry... */ - return -EPERM; -} - -/** - * mnt_want_write_file - get write access to a file's mount - * @file: the file who's mount on which to take a write - * - * This is like mnt_want_write, but it takes a file and can - * do some optimisations if the file is open for write already - * - * Mostly called by filesystems from their ioctl operation before performing - * modification. On overlayfs this needs to check if the file is on a read-only - * lower layer and deny access in that case. - */ -int mnt_want_write_file(struct file *file) -{ - int ret; - - ret = may_write_real(file); - if (!ret) { - sb_start_write(file_inode(file)->i_sb); - ret = __mnt_want_write_file(file); - if (ret) - sb_end_write(file_inode(file)->i_sb); - } - return ret; -} EXPORT_SYMBOL_GPL(mnt_want_write_file); /** @@ -536,15 +484,9 @@ void __mnt_drop_write_file(struct file *file) __mnt_drop_write(file->f_path.mnt); } -void mnt_drop_write_file_path(struct file *file) -{ - mnt_drop_write(file->f_path.mnt); -} - void mnt_drop_write_file(struct file *file) { - __mnt_drop_write(file->f_path.mnt); - sb_end_write(file_inode(file)->i_sb); + mnt_drop_write(file->f_path.mnt); } EXPORT_SYMBOL(mnt_drop_write_file); diff --git a/fs/open.c b/fs/open.c index fed24862ef83..a3c03a4a9078 100644 --- a/fs/open.c +++ b/fs/open.c @@ -707,12 +707,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group) if (!f.file) goto out; - error = mnt_want_write_file_path(f.file); + error = mnt_want_write_file(f.file); if (error) goto out_fput; audit_file(f.file); error = chown_common(&f.file->f_path, user, group); - mnt_drop_write_file_path(f.file); + mnt_drop_write_file(f.file); out_fput: fdput(f); out: diff --git a/fs/xattr.c b/fs/xattr.c index f9cb1db187b7..3a24027c062d 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -23,7 +23,6 @@ #include #include -#include "internal.h" static const char * strcmp_prefix(const char *a, const char *a_prefix) @@ -501,10 +500,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, if (!f.file) return error; audit_file(f.file); - error = mnt_want_write_file_path(f.file); + error = mnt_want_write_file(f.file); if (!error) { error = setxattr(f.file->f_path.dentry, name, value, size, flags); - mnt_drop_write_file_path(f.file); + mnt_drop_write_file(f.file); } fdput(f); return error; @@ -733,10 +732,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) if (!f.file) return error; audit_file(f.file); - error = mnt_want_write_file_path(f.file); + error = mnt_want_write_file(f.file); if (!error) { error = removexattr(f.file->f_path.dentry, name); - mnt_drop_write_file_path(f.file); + mnt_drop_write_file(f.file); } fdput(f); return error; -- cgit v1.2.3 From c6718543463dbb78486ad259f884cb800df802b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:43 +0200 Subject: Revert "vfs: update ovl inode before relatime check" This reverts commit 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa. Overlayfs no longer relies on the vfs correct atime handling. Signed-off-by: Miklos Szeredi --- fs/inode.c | 33 ++++++--------------------------- fs/internal.h | 7 ------- fs/namei.c | 2 +- include/linux/fs.h | 1 + 4 files changed, 8 insertions(+), 35 deletions(-) (limited to 'fs/internal.h') diff --git a/fs/inode.c b/fs/inode.c index 7ef6f1942757..077a6174dfd5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1554,37 +1554,17 @@ sector_t bmap(struct inode *inode, sector_t block) } EXPORT_SYMBOL(bmap); -/* - * Update times in overlayed inode from underlying real inode - */ -static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode, - bool rcu) -{ - if (!rcu) { - struct inode *realinode = d_real_inode(dentry); - - if (unlikely(inode != realinode) && - (!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) || - !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) { - inode->i_mtime = realinode->i_mtime; - inode->i_ctime = realinode->i_ctime; - } - } -} - /* * With relative atime, only update atime if the previous atime is * earlier than either the ctime or mtime or if at least a day has * passed since the last atime update. */ -static int relatime_need_update(const struct path *path, struct inode *inode, - struct timespec now, bool rcu) +static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, + struct timespec now) { - if (!(path->mnt->mnt_flags & MNT_RELATIME)) + if (!(mnt->mnt_flags & MNT_RELATIME)) return 1; - - update_ovl_inode_times(path->dentry, inode, rcu); /* * Is mtime younger than atime? If yes, update atime: */ @@ -1655,8 +1635,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags) * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -bool __atime_needs_update(const struct path *path, struct inode *inode, - bool rcu) +bool atime_needs_update(const struct path *path, struct inode *inode) { struct vfsmount *mnt = path->mnt; struct timespec64 now; @@ -1682,7 +1661,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode, now = current_time(inode); - if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu)) + if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now))) return false; if (timespec64_equal(&inode->i_atime, &now)) @@ -1697,7 +1676,7 @@ void touch_atime(const struct path *path) struct inode *inode = d_inode(path->dentry); struct timespec64 now; - if (!__atime_needs_update(path, inode, false)) + if (!atime_needs_update(path, inode)) return; if (!sb_start_write_trylock(inode->i_sb)) diff --git a/fs/internal.h b/fs/internal.h index abb41f346b18..e54ad4361d0d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -133,13 +133,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); -extern bool __atime_needs_update(const struct path *, struct inode *, bool); -static inline bool atime_needs_update_rcu(const struct path *path, - struct inode *inode) -{ - return __atime_needs_update(path, inode, true); -} - /* * fs-writeback.c */ diff --git a/fs/namei.c b/fs/namei.c index d152cc05fdc3..31182b71b313 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd) if (!(nd->flags & LOOKUP_RCU)) { touch_atime(&last->link); cond_resched(); - } else if (atime_needs_update_rcu(&last->link, inode)) { + } else if (atime_needs_update(&last->link, inode)) { if (unlikely(unlazy_walk(nd))) return ERR_PTR(-ECHILD); touch_atime(&last->link); diff --git a/include/linux/fs.h b/include/linux/fs.h index b67209948f1b..8f8c9ac1c9d5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2088,6 +2088,7 @@ enum file_time_flags { S_VERSION = 8, }; +extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); static inline void file_accessed(struct file *file) { -- cgit v1.2.3