From 082fd1ea1f98e6bb1189213a2404ddd774de3843 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 16 Jan 2024 13:32:47 +0200 Subject: fsnotify: optimize the case of no parent watcher If parent inode is not watching, check for the event in masks of sb/mount/inode masks early to optimize out most of the code in __fsnotify_parent() and avoid calling fsnotify(). Jens has reported that this optimization improves BW and IOPS in an io_uring benchmark by more than 10% and reduces perf reported CPU usage. before: + 4.51% io_uring [kernel.vmlinux] [k] fsnotify + 3.67% io_uring [kernel.vmlinux] [k] __fsnotify_parent after: + 2.37% io_uring [kernel.vmlinux] [k] __fsnotify_parent Reported-and-tested-by: Jens Axboe Link: https://lore.kernel.org/linux-fsdevel/b45bd8ff-5654-4e67-90a6-aad5e6759e0b@kernel.dk/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Message-Id: <20240116113247.758848-1-amir73il@gmail.com> --- fs/notify/fsnotify.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8bfd690e9f10..2fc105a72a8f 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -141,7 +141,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } /* Are inode/sb/mount interested in parent and name info with this event? */ -static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt, +static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, __u32 mask) { __u32 marks_mask = 0; @@ -160,13 +160,22 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt, /* Did either inode/sb/mount subscribe for events with parent/name? */ marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); - if (mnt) - marks_mask |= fsnotify_parent_needed_mask(mnt->mnt_fsnotify_mask); + marks_mask |= fsnotify_parent_needed_mask(mnt_mask); /* Did they subscribe for this event with parent/name info? */ return mask & marks_mask; } +/* Are there any inode/mount/sb objects that are interested in this event? */ +static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, + __u32 mask) +{ + __u32 marks_mask = inode->i_fsnotify_mask | mnt_mask | + inode->i_sb->s_fsnotify_mask; + + return mask & marks_mask & ALL_FSNOTIFY_EVENTS; +} + /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with @@ -179,7 +188,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { const struct path *path = fsnotify_data_path(data, data_type); - struct mount *mnt = path ? real_mount(path->mnt) : NULL; + __u32 mnt_mask = path ? real_mount(path->mnt)->mnt_fsnotify_mask : 0; struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -190,16 +199,13 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, struct qstr *file_name = NULL; int ret = 0; - /* - * Do inode/sb/mount care about parent and name info on non-dir? - * Do they care about any event at all? - */ - if (!inode->i_fsnotify_marks && !inode->i_sb->s_fsnotify_marks && - (!mnt || !mnt->mnt_fsnotify_marks) && !parent_watched) + /* Optimize the likely case of nobody watching this path */ + if (likely(!parent_watched && + !fsnotify_object_watched(inode, mnt_mask, mask))) return 0; parent = NULL; - parent_needed = fsnotify_event_needs_parent(inode, mnt, mask); + parent_needed = fsnotify_event_needs_parent(inode, mnt_mask, mask); if (!parent_watched && !parent_needed) goto notify; -- cgit v1.2.3 From b7dbaace39713025f1fd33407c89651a0c09f667 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 24 Jan 2024 16:29:33 +0100 Subject: fsnotify: Add fsnotify_sb_has_watchers() helper Instead of opencoded checks for number of fsnotify connectors add a helper fsnotify_sb_has_watchers(). Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 8300a5286988..1a9de119a0f7 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,6 +17,12 @@ #include #include +/* Are there any inode/mount/sb objects that are being watched at all? */ +static inline bool fsnotify_sb_has_watchers(struct super_block *sb) +{ + return atomic_long_read(&sb->s_fsnotify_connectors); +} + /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may @@ -30,7 +36,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { - if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); @@ -44,7 +50,7 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, static inline void fsnotify_inode(struct inode *inode, __u32 mask) { - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) @@ -59,7 +65,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { -- cgit v1.2.3 From e225555028bd3671ad6f4ce72405a95d62e17371 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Tue, 5 Mar 2024 18:08:26 -0800 Subject: inotify: Fix misspelling of "writable" Several file system notification system headers have "writable" misspelled as "writtable" in the comments. This patch fixes it in the inotify header. Signed-off-by: Vicki Pfau Signed-off-by: Jan Kara Message-Id: <20240306020831.1404033-1-vi@endrift.com> --- include/uapi/linux/inotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h index b3e165853d5b..d94f20e38e5d 100644 --- a/include/uapi/linux/inotify.h +++ b/include/uapi/linux/inotify.h @@ -30,8 +30,8 @@ struct inotify_event { #define IN_ACCESS 0x00000001 /* File was accessed */ #define IN_MODIFY 0x00000002 /* File was modified */ #define IN_ATTRIB 0x00000004 /* Metadata changed */ -#define IN_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ -#define IN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define IN_CLOSE_WRITE 0x00000008 /* Writable file was closed */ +#define IN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define IN_OPEN 0x00000020 /* File was opened */ #define IN_MOVED_FROM 0x00000040 /* File was moved from X */ #define IN_MOVED_TO 0x00000080 /* File was moved to Y */ -- cgit v1.2.3 From 9fe0c03f0bfc5f74dad6e818090ab967d8603095 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Tue, 5 Mar 2024 18:08:27 -0800 Subject: fsnotify: Fix misspelling of "writable" Several file system notification system headers have "writable" misspelled as "writtable" in the comments. This patch fixes it in the fsnotify header. Signed-off-by: Vicki Pfau Signed-off-by: Jan Kara Message-Id: <20240306020831.1404033-2-vi@endrift.com> --- include/linux/fsnotify_backend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7f63be5ca0f1..8f40c349b228 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -31,8 +31,8 @@ #define FS_ACCESS 0x00000001 /* File was accessed */ #define FS_MODIFY 0x00000002 /* File was modified */ #define FS_ATTRIB 0x00000004 /* Metadata changed */ -#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ -#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define FS_OPEN 0x00000020 /* File was opened */ #define FS_MOVED_FROM 0x00000040 /* File was moved from X */ #define FS_MOVED_TO 0x00000080 /* File was moved to Y */ -- cgit v1.2.3 From 8c2c2549fb32f2e6dd247cfed2e23cf8456dd458 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Tue, 5 Mar 2024 18:08:28 -0800 Subject: fanotify: Fix misspelling of "writable" Several file system notification system headers have "writable" misspelled as "writtable" in the comments. This patch fixes it in the fanotify header. Signed-off-by: Vicki Pfau Signed-off-by: Jan Kara Message-Id: <20240306020831.1404033-3-vi@endrift.com> --- include/uapi/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index cd14c94e9a1e..a37de58ca571 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -8,8 +8,8 @@ #define FAN_ACCESS 0x00000001 /* File was accessed */ #define FAN_MODIFY 0x00000002 /* File was modified */ #define FAN_ATTRIB 0x00000004 /* Metadata changed */ -#define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ -#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ #define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ #define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ -- cgit v1.2.3 From 0045fb1bab4eaa8f415c2fd76020bf7b2a3be47a Mon Sep 17 00:00:00 2001 From: Winston Wen Date: Tue, 5 Mar 2024 14:18:04 +0800 Subject: fanotify: allow freeze when waiting response for permission events This is a long-standing issue that uninterruptible sleep in fanotify could make system hibernation fail if the usperspace server gets frozen before the process waiting for the response (as reported e.g. [1][2]). A few years ago, there was an attempt to switch to interruptible sleep while waiting [3], but that would lead to EINTR returns from open(2) and break userspace [4], so it's been changed to only killable [5]. And the core freezer logic had been rewritten [6][7] in v6.1, allowing freezing in uninterrupted sleep, so we can solve this problem now. [1] https://lore.kernel.org/lkml/1518774280-38090-1-git-send-email-t.vivek@samsung.com/ [2] https://lore.kernel.org/lkml/c1bb16b7-9eee-9cea-2c96-a512d8b3b9c7@nwra.com/ [3] https://lore.kernel.org/linux-fsdevel/20190213145443.26836-1-jack@suse.cz/ [4] https://lore.kernel.org/linux-fsdevel/d0031e3a-f050-0832-fa59-928a80ffd44b@nwra.com/ [5] https://lore.kernel.org/linux-fsdevel/20190221105558.GA20921@quack2.suse.cz/ [6] https://lore.kernel.org/lkml/20220822114649.055452969@infradead.org/ [7] https://lore.kernel.org/lkml/20230908-avoid-spurious-freezer-wakeups-v4-0-6155aa3dafae@quicinc.com/ Signed-off-by: Winston Wen Signed-off-by: Jan Kara Message-Id: --- fs/notify/fanotify/fanotify.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 1e4def21811e..224bccaab4cc 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -228,8 +228,10 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - ret = wait_event_killable(group->fanotify_data.access_waitq, - event->state == FAN_EVENT_ANSWERED); + ret = wait_event_state(group->fanotify_data.access_waitq, + event->state == FAN_EVENT_ANSWERED, + (TASK_KILLABLE|TASK_FREEZABLE)); + /* Signal pending? */ if (ret < 0) { spin_lock(&group->notification_lock); -- cgit v1.2.3