From 2864f301424227d9d3bde6d550bc224a83535b46 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Jul 2016 23:15:21 -0400 Subject: iget_locked et.al.: make sure we don't return bad inodes If one thread does iget_locked(), proceeds to try and set the new inode up and fails, inode will be unhashed and dropped. However, another thread doing ilookup/iget_locked in the middle of that would end up finding a half-set-up inode, grabbing a reference, waiting for it to come unlocked and getting the resulting bad inode. It's a race (if that ilookup had been called just after the failure of setup attempt it wouldn't have found the sucker at all), particularly unpleasant in cases when failure is transient/caller-dependent/etc. While it can be dealt with in the callers, there's no reason not to handle it in fs/inode.c primitives, especially since the cost is trivial. Signed-off-by: Al Viro --- fs/inode.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 4ccbc21b30ce..d123fe4b6f7d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1019,13 +1019,17 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; - +again: spin_lock(&inode_hash_lock); inode = find_inode(sb, head, test, data); spin_unlock(&inode_hash_lock); if (inode) { wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } return inode; } @@ -1062,6 +1066,10 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, destroy_inode(inode); inode = old; wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } } return inode; @@ -1089,12 +1097,16 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; - +again: spin_lock(&inode_hash_lock); inode = find_inode_fast(sb, head, ino); spin_unlock(&inode_hash_lock); if (inode) { wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } return inode; } @@ -1129,6 +1141,10 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) destroy_inode(inode); inode = old; wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } } return inode; } @@ -1264,10 +1280,16 @@ EXPORT_SYMBOL(ilookup5_nowait); struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct inode *inode = ilookup5_nowait(sb, hashval, test, data); - - if (inode) + struct inode *inode; +again: + inode = ilookup5_nowait(sb, hashval, test, data); + if (inode) { wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } + } return inode; } EXPORT_SYMBOL(ilookup5); @@ -1284,13 +1306,18 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; - +again: spin_lock(&inode_hash_lock); inode = find_inode_fast(sb, head, ino); spin_unlock(&inode_hash_lock); - if (inode) + if (inode) { wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } + } return inode; } EXPORT_SYMBOL(ilookup); -- cgit v1.2.3 From 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: update ovl inode before relatime check On overlayfs relatime_need_update() needs inode times to be correct on overlay inode. But i_mtime and i_ctime are updated by filesystem code on underlying inode only, so they will be out-of-date on the overlay inode. This patch copies the times from the underlying inode if needed. This can't be done if called from RCU lookup (link following) but link m/ctime are not updated by fs, so this is all right. This patch doesn't change functionality for anything but overlayfs. Signed-off-by: Miklos Szeredi --- fs/inode.c | 33 +++++++++++++++++++++++++++------ fs/internal.h | 9 +++++++++ fs/namei.c | 2 +- include/linux/fs.h | 1 - 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 7e3ef3af3db9..4a1fc1631e00 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1535,17 +1535,37 @@ sector_t bmap(struct inode *inode, sector_t block) } EXPORT_SYMBOL(bmap); +/* + * Update times in overlayed inode from underlying real inode + */ +static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode, + bool rcu) +{ + if (!rcu) { + struct inode *realinode = d_real_inode(dentry); + + if (unlikely(inode != realinode) && + (!timespec_equal(&inode->i_mtime, &realinode->i_mtime) || + !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) { + inode->i_mtime = realinode->i_mtime; + inode->i_ctime = realinode->i_ctime; + } + } +} + /* * With relative atime, only update atime if the previous atime is * earlier than either the ctime or mtime or if at least a day has * passed since the last atime update. */ -static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, - struct timespec now) +static int relatime_need_update(const struct path *path, struct inode *inode, + struct timespec now, bool rcu) { - if (!(mnt->mnt_flags & MNT_RELATIME)) + if (!(path->mnt->mnt_flags & MNT_RELATIME)) return 1; + + update_ovl_inode_times(path->dentry, inode, rcu); /* * Is mtime younger than atime? If yes, update atime: */ @@ -1612,7 +1632,8 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -bool atime_needs_update(const struct path *path, struct inode *inode) +bool __atime_needs_update(const struct path *path, struct inode *inode, + bool rcu) { struct vfsmount *mnt = path->mnt; struct timespec now; @@ -1638,7 +1659,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode) now = current_fs_time(inode->i_sb); - if (!relatime_need_update(mnt, inode, now)) + if (!relatime_need_update(path, inode, now, rcu)) return false; if (timespec_equal(&inode->i_atime, &now)) @@ -1653,7 +1674,7 @@ void touch_atime(const struct path *path) struct inode *inode = d_inode(path->dentry); struct timespec now; - if (!atime_needs_update(path, inode)) + if (!__atime_needs_update(path, inode, false)) return; if (!sb_start_write_trylock(inode->i_sb)) diff --git a/fs/internal.h b/fs/internal.h index ba0737649d4a..a63da5e96148 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -120,6 +120,15 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); +extern bool __atime_needs_update(const struct path *, struct inode *, bool); +static inline bool atime_needs_update_rcu(const struct path *path, + struct inode *inode) +{ + return __atime_needs_update(path, inode, true); +} + +extern bool atime_needs_update_rcu(const struct path *, struct inode *); + /* * fs-writeback.c */ diff --git a/fs/namei.c b/fs/namei.c index adb04146df09..4bbcae1ba58e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd) if (!(nd->flags & LOOKUP_RCU)) { touch_atime(&last->link); cond_resched(); - } else if (atime_needs_update(&last->link, inode)) { + } else if (atime_needs_update_rcu(&last->link, inode)) { if (unlikely(unlazy_walk(nd, NULL, 0))) return ERR_PTR(-ECHILD); touch_atime(&last->link); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c391366fb43..7db097d673a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2007,7 +2007,6 @@ enum file_time_flags { S_VERSION = 8, }; -extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); static inline void file_accessed(struct file *file) { -- cgit v1.2.3