summaryrefslogtreecommitdiff
path: root/fs/notify/fanotify/fanotify.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify/fanotify/fanotify.c')
-rw-r--r--fs/notify/fanotify/fanotify.c166
1 files changed, 119 insertions, 47 deletions
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 1192c9953620..057abd2cf887 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -14,6 +14,7 @@
#include <linux/audit.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
+#include <linux/stringhash.h>
#include "fanotify.h"
@@ -22,12 +23,24 @@ static bool fanotify_path_equal(struct path *p1, struct path *p2)
return p1->mnt == p2->mnt && p1->dentry == p2->dentry;
}
+static unsigned int fanotify_hash_path(const struct path *path)
+{
+ return hash_ptr(path->dentry, FANOTIFY_EVENT_HASH_BITS) ^
+ hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
+}
+
static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
__kernel_fsid_t *fsid2)
{
return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
}
+static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
+{
+ return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
+ hash_32(fsid->val[1], FANOTIFY_EVENT_HASH_BITS);
+}
+
static bool fanotify_fh_equal(struct fanotify_fh *fh1,
struct fanotify_fh *fh2)
{
@@ -38,6 +51,16 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1,
!memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len);
}
+static unsigned int fanotify_hash_fh(struct fanotify_fh *fh)
+{
+ long salt = (long)fh->type | (long)fh->len << 8;
+
+ /*
+ * full_name_hash() works long by long, so it handles fh buf optimally.
+ */
+ return full_name_hash((void *)salt, fanotify_fh_buf(fh), fh->len);
+}
+
static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1,
struct fanotify_fid_event *ffe2)
{
@@ -88,16 +111,12 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
return fanotify_info_equal(info1, info2);
}
-static bool fanotify_should_merge(struct fsnotify_event *old_fsn,
- struct fsnotify_event *new_fsn)
+static bool fanotify_should_merge(struct fanotify_event *old,
+ struct fanotify_event *new)
{
- struct fanotify_event *old, *new;
-
- pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
- old = FANOTIFY_E(old_fsn);
- new = FANOTIFY_E(new_fsn);
+ pr_debug("%s: old=%p new=%p\n", __func__, old, new);
- if (old_fsn->objectid != new_fsn->objectid ||
+ if (old->hash != new->hash ||
old->type != new->type || old->pid != new->pid)
return false;
@@ -129,14 +148,20 @@ static bool fanotify_should_merge(struct fsnotify_event *old_fsn,
return false;
}
+/* Limit event merges to limit CPU overhead per event */
+#define FANOTIFY_MAX_MERGE_EVENTS 128
+
/* and the list better be locked by something too! */
-static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
+static int fanotify_merge(struct fsnotify_group *group,
+ struct fsnotify_event *event)
{
- struct fsnotify_event *test_event;
- struct fanotify_event *new;
+ struct fanotify_event *old, *new = FANOTIFY_E(event);
+ unsigned int bucket = fanotify_event_hash_bucket(group, new);
+ struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
+ int i = 0;
- pr_debug("%s: list=%p event=%p\n", __func__, list, event);
- new = FANOTIFY_E(event);
+ pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
+ group, event, bucket);
/*
* Don't merge a permission event with any other event so that we know
@@ -146,9 +171,11 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
if (fanotify_is_perm_event(new->mask))
return 0;
- list_for_each_entry_reverse(test_event, list, list) {
- if (fanotify_should_merge(test_event, event)) {
- FANOTIFY_E(test_event)->mask |= new->mask;
+ hlist_for_each_entry(old, hlist, merge_list) {
+ if (++i > FANOTIFY_MAX_MERGE_EVENTS)
+ break;
+ if (fanotify_should_merge(old, new)) {
+ old->mask |= new->mask;
return 1;
}
}
@@ -184,8 +211,11 @@ static int fanotify_get_response(struct fsnotify_group *group,
return ret;
}
/* Event not yet reported? Just remove it. */
- if (event->state == FAN_EVENT_INIT)
+ if (event->state == FAN_EVENT_INIT) {
fsnotify_remove_queued_event(group, &event->fae.fse);
+ /* Permission events are not supposed to be hashed */
+ WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list));
+ }
/*
* Event may be also answered in case signal delivery raced
* with wakeup. In that case we have nothing to do besides
@@ -329,7 +359,8 @@ static int fanotify_encode_fh_len(struct inode *inode)
* Return 0 on failure to encode.
*/
static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
- unsigned int fh_len, gfp_t gfp)
+ unsigned int fh_len, unsigned int *hash,
+ gfp_t gfp)
{
int dwords, type = 0;
char *ext_buf = NULL;
@@ -372,6 +403,9 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
fh->type = type;
fh->len = fh_len;
+ /* Mix fh into event merge key */
+ *hash ^= fanotify_hash_fh(fh);
+
return FANOTIFY_FH_HDR_LEN + fh_len;
out_err:
@@ -425,6 +459,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data,
}
static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
+ unsigned int *hash,
gfp_t gfp)
{
struct fanotify_path_event *pevent;
@@ -435,6 +470,7 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH;
pevent->path = *path;
+ *hash ^= fanotify_hash_path(path);
path_get(path);
return &pevent->fae;
@@ -460,6 +496,7 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
__kernel_fsid_t *fsid,
+ unsigned int *hash,
gfp_t gfp)
{
struct fanotify_fid_event *ffe;
@@ -470,16 +507,18 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
ffe->fsid = *fsid;
+ *hash ^= fanotify_hash_fsid(fsid);
fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
- gfp);
+ hash, gfp);
return &ffe->fae;
}
static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
__kernel_fsid_t *fsid,
- const struct qstr *file_name,
+ const struct qstr *name,
struct inode *child,
+ unsigned int *hash,
gfp_t gfp)
{
struct fanotify_name_event *fne;
@@ -492,24 +531,30 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len;
if (child_fh_len)
size += FANOTIFY_FH_HDR_LEN + child_fh_len;
- if (file_name)
- size += file_name->len + 1;
+ if (name)
+ size += name->len + 1;
fne = kmalloc(size, gfp);
if (!fne)
return NULL;
fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME;
fne->fsid = *fsid;
+ *hash ^= fanotify_hash_fsid(fsid);
info = &fne->info;
fanotify_info_init(info);
dfh = fanotify_info_dir_fh(info);
- info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0);
+ info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, hash, 0);
if (child_fh_len) {
ffh = fanotify_info_file_fh(info);
- info->file_fh_totlen = fanotify_encode_fh(ffh, child, child_fh_len, 0);
+ info->file_fh_totlen = fanotify_encode_fh(ffh, child,
+ child_fh_len, hash, 0);
+ }
+ if (name) {
+ long salt = name->len;
+
+ fanotify_info_copy_name(info, name);
+ *hash ^= full_name_hash((void *)salt, name->name, name->len);
}
- if (file_name)
- fanotify_info_copy_name(info, file_name);
pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
__func__, id->i_ino, size, dir_fh_len, child_fh_len,
@@ -533,6 +578,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct mem_cgroup *old_memcg;
struct inode *child = NULL;
bool name_event = false;
+ unsigned int hash = 0;
+ bool ondir = mask & FAN_ONDIR;
+ struct pid *pid;
if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
/*
@@ -540,8 +588,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
* report the child fid for events reported on a non-dir child
* in addition to reporting the parent fid and maybe child name.
*/
- if ((fid_mode & FAN_REPORT_FID) &&
- id != dirid && !(mask & FAN_ONDIR))
+ if ((fid_mode & FAN_REPORT_FID) && id != dirid && !ondir)
child = id;
id = dirid;
@@ -562,8 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
if (!(fid_mode & FAN_REPORT_NAME)) {
name_event = !!child;
file_name = NULL;
- } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
- !(mask & FAN_ONDIR)) {
+ } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) {
name_event = true;
}
}
@@ -586,26 +632,25 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
event = fanotify_alloc_perm_event(path, gfp);
} else if (name_event && (file_name || child)) {
event = fanotify_alloc_name_event(id, fsid, file_name, child,
- gfp);
+ &hash, gfp);
} else if (fid_mode) {
- event = fanotify_alloc_fid_event(id, fsid, gfp);
+ event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
} else {
- event = fanotify_alloc_path_event(path, gfp);
+ event = fanotify_alloc_path_event(path, &hash, gfp);
}
if (!event)
goto out;
- /*
- * Use the victim inode instead of the watching inode as the id for
- * event queue, so event reported on parent is merged with event
- * reported on child when both directory and child watches exist.
- */
- fanotify_init_event(event, (unsigned long)id, mask);
if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
- event->pid = get_pid(task_pid(current));
+ pid = get_pid(task_pid(current));
else
- event->pid = get_pid(task_tgid(current));
+ pid = get_pid(task_tgid(current));
+
+ /* Mix event info, FAN_ONDIR flag and pid into event merge key */
+ hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS);
+ fanotify_init_event(event, hash, mask);
+ event->pid = pid;
out:
set_active_memcg(old_memcg);
@@ -645,6 +690,24 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
return fsid;
}
+/*
+ * Add an event to hash table for faster merge.
+ */
+static void fanotify_insert_event(struct fsnotify_group *group,
+ struct fsnotify_event *fsn_event)
+{
+ struct fanotify_event *event = FANOTIFY_E(fsn_event);
+ unsigned int bucket = fanotify_event_hash_bucket(group, event);
+ struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
+
+ assert_spin_locked(&group->notification_lock);
+
+ pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
+ group, event, bucket);
+
+ hlist_add_head(&event->merge_list, hlist);
+}
+
static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
const void *data, int data_type,
struct inode *dir,
@@ -715,7 +778,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
}
fsn_event = &event->fse;
- ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
+ ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
+ fanotify_is_hashed_event(mask) ?
+ fanotify_insert_event : NULL);
if (ret) {
/* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
@@ -736,11 +801,10 @@ finish:
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
- struct user_struct *user;
-
- user = group->fanotify_data.user;
- atomic_dec(&user->fanotify_listeners);
- free_uid(user);
+ kfree(group->fanotify_data.merge_hash);
+ if (group->fanotify_data.ucounts)
+ dec_ucount(group->fanotify_data.ucounts,
+ UCOUNT_FANOTIFY_GROUPS);
}
static void fanotify_free_path_event(struct fanotify_event *event)
@@ -796,6 +860,13 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
}
}
+static void fanotify_freeing_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
+{
+ if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS))
+ dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_MARKS);
+}
+
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
kmem_cache_free(fanotify_mark_cache, fsn_mark);
@@ -805,5 +876,6 @@ const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.free_group_priv = fanotify_free_group_priv,
.free_event = fanotify_free_event,
+ .freeing_mark = fanotify_freeing_mark,
.free_mark = fanotify_free_mark,
};