summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/nfsd/filecache.c14
-rw-r--r--fs/notify/dnotify/dnotify.c13
-rw-r--r--fs/notify/fanotify/fanotify.c24
-rw-r--r--fs/notify/fanotify/fanotify.h12
-rw-r--r--fs/notify/fanotify/fanotify_user.c104
-rw-r--r--fs/notify/fdinfo.c21
-rw-r--r--fs/notify/fsnotify.c89
-rw-r--r--fs/notify/group.c32
-rw-r--r--fs/notify/inotify/inotify.h19
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c47
-rw-r--r--fs/notify/mark.c112
-rw-r--r--include/linux/fanotify.h1
-rw-r--r--include/linux/fsnotify_backend.h98
-rw-r--r--include/uapi/linux/fanotify.h1
-rw-r--r--kernel/audit_fsnotify.c5
-rw-r--r--kernel/audit_tree.c34
-rw-r--r--kernel/audit_watch.c2
18 files changed, 396 insertions, 234 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 2c1b027774d4..489c9c1d8f31 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -119,14 +119,14 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
struct inode *inode = nf->nf_inode;
do {
- mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
+ fsnotify_group_lock(nfsd_file_fsnotify_group);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
- nfsd_file_fsnotify_group);
+ nfsd_file_fsnotify_group);
if (mark) {
nfm = nfsd_file_mark_get(container_of(mark,
struct nfsd_file_mark,
nfm_mark));
- mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+ fsnotify_group_unlock(nfsd_file_fsnotify_group);
if (nfm) {
fsnotify_put_mark(mark);
break;
@@ -134,8 +134,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
/* Avoid soft lockup race with nfsd_file_mark_put() */
fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
fsnotify_put_mark(mark);
- } else
- mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+ } else {
+ fsnotify_group_unlock(nfsd_file_fsnotify_group);
+ }
/* allocate a new nfm */
new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
@@ -678,7 +679,8 @@ nfsd_file_cache_init(void)
goto out_shrinker;
}
- nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
+ FSNOTIFY_GROUP_NOFS);
if (IS_ERR(nfsd_file_fsnotify_group)) {
pr_err("nfsd: unable to create fsnotify group: %ld\n",
PTR_ERR(nfsd_file_fsnotify_group));
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 829dd4a61b66..190aa717fa32 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -168,7 +168,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
return;
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
- mutex_lock(&dnotify_group->mark_mutex);
+ fsnotify_group_lock(dnotify_group);
spin_lock(&fsn_mark->lock);
prev = &dn_mark->dn;
@@ -191,7 +191,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
free = true;
}
- mutex_unlock(&dnotify_group->mark_mutex);
+ fsnotify_group_unlock(dnotify_group);
if (free)
fsnotify_free_mark(fsn_mark);
@@ -324,7 +324,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
new_dn_mark->dn = NULL;
/* this is needed to prevent the fcntl/close race described below */
- mutex_lock(&dnotify_group->mark_mutex);
+ fsnotify_group_lock(dnotify_group);
/* add the new_fsn_mark or find an old one. */
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
@@ -334,7 +334,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
} else {
error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0);
if (error) {
- mutex_unlock(&dnotify_group->mark_mutex);
+ fsnotify_group_unlock(dnotify_group);
goto out_err;
}
spin_lock(&new_fsn_mark->lock);
@@ -383,7 +383,7 @@ out:
if (destroy)
fsnotify_detach_mark(fsn_mark);
- mutex_unlock(&dnotify_group->mark_mutex);
+ fsnotify_group_unlock(dnotify_group);
if (destroy)
fsnotify_free_mark(fsn_mark);
fsnotify_put_mark(fsn_mark);
@@ -401,7 +401,8 @@ static int __init dnotify_init(void)
SLAB_PANIC|SLAB_ACCOUNT);
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
- dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
+ dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
+ FSNOTIFY_GROUP_NOFS);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
dnotify_sysctl_init();
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 985e995d2a39..4f897e109547 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -319,12 +319,8 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
return 0;
}
- fsnotify_foreach_iter_type(type) {
- if (!fsnotify_iter_should_report_type(iter_info, type))
- continue;
- mark = iter_info->marks[type];
-
- /* Apply ignore mask regardless of ISDIR and ON_CHILD flags */
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ /* Apply ignore mask regardless of mark's ISDIR flag */
marks_ignored_mask |= mark->ignored_mask;
/*
@@ -334,14 +330,6 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
continue;
- /*
- * If the event is on a child and this mark is on a parent not
- * watching children, don't send it!
- */
- if (type == FSNOTIFY_ITER_TYPE_PARENT &&
- !(mark->mask & FS_EVENT_ON_CHILD))
- continue;
-
marks_mask |= mark->mask;
/* Record the mark types of this group that matched the event */
@@ -849,16 +837,14 @@ out:
*/
static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{
+ struct fsnotify_mark *mark;
int type;
__kernel_fsid_t fsid = {};
- fsnotify_foreach_iter_type(type) {
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
struct fsnotify_mark_connector *conn;
- if (!fsnotify_iter_should_report_type(iter_info, type))
- continue;
-
- conn = READ_ONCE(iter_info->marks[type]->connector);
+ conn = READ_ONCE(mark->connector);
/* Mark is just getting destroyed or created? */
if (!conn)
continue;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index a3d5b751cac5..80e0ec95b113 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -490,3 +490,15 @@ static inline unsigned int fanotify_event_hash_bucket(
{
return event->hash & FANOTIFY_HTABLE_MASK;
}
+
+static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
+{
+ unsigned int mflags = 0;
+
+ if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+ mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
+ if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
+ mflags |= FAN_MARK_EVICTABLE;
+
+ return mflags;
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a792e21c5309..c2255b440df9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -264,7 +264,7 @@ static int create_fd(struct fsnotify_group *group, struct path *path,
* originally opened O_WRONLY.
*/
new_file = dentry_open(path,
- group->fanotify_data.f_flags | FMODE_NONOTIFY,
+ group->fanotify_data.f_flags | __FMODE_NONOTIFY,
current_cred());
if (IS_ERR(new_file)) {
/*
@@ -1035,10 +1035,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
__u32 removed;
int destroy_mark;
- mutex_lock(&group->mark_mutex);
+ fsnotify_group_lock(group);
fsn_mark = fsnotify_find_mark(connp, group);
if (!fsn_mark) {
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
return -ENOENT;
}
@@ -1048,7 +1048,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
fsnotify_recalc_mask(fsn_mark->connector);
if (destroy_mark)
fsnotify_detach_mark(fsn_mark);
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
if (destroy_mark)
fsnotify_free_mark(fsn_mark);
@@ -1081,47 +1081,63 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
flags, umask);
}
-static void fanotify_mark_add_ignored_mask(struct fsnotify_mark *fsn_mark,
- __u32 mask, unsigned int flags,
- __u32 *removed)
+static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
+ unsigned int fan_flags)
{
- fsn_mark->ignored_mask |= mask;
+ bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
+ bool recalc = false;
/*
* Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
* the removal of the FS_MODIFY bit in calculated mask if it was set
* because of an ignored mask that is now going to survive FS_MODIFY.
*/
- if ((flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
+ (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
if (!(fsn_mark->mask & FS_MODIFY))
- *removed = FS_MODIFY;
+ recalc = true;
}
+
+ if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE ||
+ want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
+ return recalc;
+
+ /*
+ * NO_IREF may be removed from a mark, but not added.
+ * When removed, fsnotify_recalc_mask() will take the inode ref.
+ */
+ WARN_ON_ONCE(!want_iref);
+ fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF;
+
+ return true;
}
-static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
- __u32 mask, unsigned int flags,
- __u32 *removed)
+static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+ __u32 mask, unsigned int fan_flags)
{
- __u32 oldmask, newmask;
+ bool recalc;
spin_lock(&fsn_mark->lock);
- oldmask = fsnotify_calc_mask(fsn_mark);
- if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ if (!(fan_flags & FAN_MARK_IGNORED_MASK))
fsn_mark->mask |= mask;
- } else {
- fanotify_mark_add_ignored_mask(fsn_mark, mask, flags, removed);
- }
- newmask = fsnotify_calc_mask(fsn_mark);
+ else
+ fsn_mark->ignored_mask |= mask;
+
+ recalc = fsnotify_calc_mask(fsn_mark) &
+ ~fsnotify_conn_mask(fsn_mark->connector);
+
+ recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags);
spin_unlock(&fsn_mark->lock);
- return newmask & ~oldmask;
+ return recalc;
}
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp,
unsigned int obj_type,
+ unsigned int fan_flags,
__kernel_fsid_t *fsid)
{
struct ucounts *ucounts = group->fanotify_data.ucounts;
@@ -1144,6 +1160,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
}
fsnotify_init_mark(mark, group);
+ if (fan_flags & FAN_MARK_EVICTABLE)
+ mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF;
+
ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid);
if (ret) {
fsnotify_put_mark(mark);
@@ -1170,39 +1189,49 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int obj_type,
- __u32 mask, unsigned int flags,
+ __u32 mask, unsigned int fan_flags,
__kernel_fsid_t *fsid)
{
struct fsnotify_mark *fsn_mark;
- __u32 added, removed = 0;
+ bool recalc;
int ret = 0;
- mutex_lock(&group->mark_mutex);
+ fsnotify_group_lock(group);
fsn_mark = fsnotify_find_mark(connp, group);
if (!fsn_mark) {
- fsn_mark = fanotify_add_new_mark(group, connp, obj_type, fsid);
+ fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
+ fan_flags, fsid);
if (IS_ERR(fsn_mark)) {
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
return PTR_ERR(fsn_mark);
}
}
/*
+ * Non evictable mark cannot be downgraded to evictable mark.
+ */
+ if (fan_flags & FAN_MARK_EVICTABLE &&
+ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ /*
* Error events are pre-allocated per group, only if strictly
* needed (i.e. FAN_FS_ERROR was requested).
*/
- if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
+ if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
ret = fanotify_group_init_error_pool(group);
if (ret)
goto out;
}
- added = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &removed);
- if (removed || (added & ~fsnotify_conn_mask(fsn_mark->connector)))
+ recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags);
+ if (recalc)
fsnotify_recalc_mask(fsn_mark->connector);
out:
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
fsnotify_put_mark(fsn_mark);
return ret;
@@ -1348,14 +1377,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
(!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
return -EINVAL;
- f_flags = O_RDWR | FMODE_NONOTIFY;
+ f_flags = O_RDWR | __FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
if (flags & FAN_NONBLOCK)
f_flags |= O_NONBLOCK;
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
- group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops);
+ group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
+ FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS);
if (IS_ERR(group)) {
return PTR_ERR(group);
}
@@ -1598,6 +1628,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
/*
+ * Evictable is only relevant for inode marks, because only inode object
+ * can be evicted on memory pressure.
+ */
+ if (flags & FAN_MARK_EVICTABLE &&
+ mark_type != FAN_MARK_INODE)
+ goto fput_and_out;
+
+ /*
* Events that do not carry enough information to report
* event->fd require a group that supports reporting fid. Those
* events are not supported on a mount mark, because they do not
@@ -1762,7 +1800,7 @@ static int __init fanotify_user_setup(void)
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 57f0d5d9f934..59fb40abe33d 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -14,6 +14,7 @@
#include <linux/exportfs.h>
#include "inotify/inotify.h"
+#include "fanotify/fanotify.h"
#include "fdinfo.h"
#include "fsnotify.h"
@@ -28,13 +29,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f,
struct fsnotify_group *group = f->private_data;
struct fsnotify_mark *mark;
- mutex_lock(&group->mark_mutex);
+ fsnotify_group_lock(group);
list_for_each_entry(mark, &group->marks_list, g_list) {
show(m, mark);
if (seq_has_overflowed(m))
break;
}
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
}
#if defined(CONFIG_EXPORTFS)
@@ -83,16 +84,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
inode = igrab(fsnotify_conn_inode(mark->connector));
if (inode) {
- /*
- * IN_ALL_EVENTS represents all of the mask bits
- * that we expose to userspace. There is at
- * least one bit (FS_EVENT_ON_CHILD) which is
- * used only internally to the kernel.
- */
- u32 mask = mark->mask & IN_ALL_EVENTS;
- seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
+ seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ",
inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
- mask, mark->ignored_mask);
+ inotify_mark_user_mask(mark));
show_mark_fhandle(m, inode);
seq_putc(m, '\n');
iput(inode);
@@ -110,12 +104,9 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f)
static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
{
- unsigned int mflags = 0;
+ unsigned int mflags = fanotify_mark_user_flags(mark);
struct inode *inode;
- if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
- mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
-
if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = igrab(fsnotify_conn_inode(mark->connector));
if (!inode)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 70a8516b78bc..0b3e74935cb4 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -253,7 +253,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group,
if (WARN_ON_ONCE(!inode && !dir))
return 0;
- if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+ if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
return 0;
@@ -290,22 +290,15 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
}
if (parent_mark) {
- /*
- * parent_mark indicates that the parent inode is watching
- * children and interested in this event, which is an event
- * possible on child. But is *this mark* watching children and
- * interested in this event?
- */
- if (parent_mark->mask & FS_EVENT_ON_CHILD) {
- ret = fsnotify_handle_inode_event(group, parent_mark, mask,
- data, data_type, dir, name, 0);
- if (ret)
- return ret;
- }
- if (!inode_mark)
- return 0;
+ ret = fsnotify_handle_inode_event(group, parent_mark, mask,
+ data, data_type, dir, name, 0);
+ if (ret)
+ return ret;
}
+ if (!inode_mark)
+ return 0;
+
if (mask & FS_EVENT_ON_CHILD) {
/*
* Some events can be sent on both parent dir and child marks
@@ -335,31 +328,23 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
struct fsnotify_mark *mark;
int type;
- if (WARN_ON(!iter_info->report_mask))
+ if (!iter_info->report_mask)
return 0;
/* clear ignored on inode modification */
if (mask & FS_MODIFY) {
- fsnotify_foreach_iter_type(type) {
- if (!fsnotify_iter_should_report_type(iter_info, type))
- continue;
- mark = iter_info->marks[type];
- if (mark &&
- !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ if (!(mark->flags &
+ FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mark->ignored_mask = 0;
}
}
- fsnotify_foreach_iter_type(type) {
- if (!fsnotify_iter_should_report_type(iter_info, type))
- continue;
- mark = iter_info->marks[type];
- /* does the object mark tell us to do something? */
- if (mark) {
- group = mark->group;
- marks_mask |= mark->mask;
- marks_ignored_mask |= mark->ignored_mask;
- }
+ /* Are any of the group marks interested in this event? */
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ group = mark->group;
+ marks_mask |= mark->mask;
+ marks_ignored_mask |= mark->ignored_mask;
}
pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
@@ -403,11 +388,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
/*
* iter_info is a multi head priority queue of marks.
- * Pick a subset of marks from queue heads, all with the
- * same group and set the report_mask for selected subset.
- * Returns the report_mask of the selected subset.
+ * Pick a subset of marks from queue heads, all with the same group
+ * and set the report_mask to a subset of the selected marks.
+ * Returns false if there are no more groups to iterate.
*/
-static unsigned int fsnotify_iter_select_report_types(
+static bool fsnotify_iter_select_report_types(
struct fsnotify_iter_info *iter_info)
{
struct fsnotify_group *max_prio_group = NULL;
@@ -423,30 +408,48 @@ static unsigned int fsnotify_iter_select_report_types(
}
if (!max_prio_group)
- return 0;
+ return false;
/* Set the report mask for marks from same group as max prio group */
+ iter_info->current_group = max_prio_group;
iter_info->report_mask = 0;
fsnotify_foreach_iter_type(type) {
mark = iter_info->marks[type];
- if (mark &&
- fsnotify_compare_groups(max_prio_group, mark->group) == 0)
+ if (mark && mark->group == iter_info->current_group) {
+ /*
+ * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
+ * is watching children and interested in this event,
+ * which is an event possible on child.
+ * But is *this mark* watching children?
+ */
+ if (type == FSNOTIFY_ITER_TYPE_PARENT &&
+ !(mark->mask & FS_EVENT_ON_CHILD))
+ continue;
+
fsnotify_iter_set_report_type(iter_info, type);
+ }
}
- return iter_info->report_mask;
+ return true;
}
/*
- * Pop from iter_info multi head queue, the marks that were iterated in the
+ * Pop from iter_info multi head queue, the marks that belong to the group of
* current iteration step.
*/
static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
{
+ struct fsnotify_mark *mark;
int type;
+ /*
+ * We cannot use fsnotify_foreach_iter_mark_type() here because we
+ * may need to advance a mark of type X that belongs to current_group
+ * but was not selected for reporting.
+ */
fsnotify_foreach_iter_type(type) {
- if (fsnotify_iter_should_report_type(iter_info, type))
+ mark = iter_info->marks[type];
+ if (mark && mark->group == iter_info->current_group)
iter_info->marks[type] =
fsnotify_next_mark(iter_info->marks[type]);
}
@@ -581,7 +584,7 @@ static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/notify/group.c b/fs/notify/group.c
index b7d4d64f87c2..1de6631a3925 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -112,8 +112,10 @@ void fsnotify_put_group(struct fsnotify_group *group)
EXPORT_SYMBOL_GPL(fsnotify_put_group);
static struct fsnotify_group *__fsnotify_alloc_group(
- const struct fsnotify_ops *ops, gfp_t gfp)
+ const struct fsnotify_ops *ops,
+ int flags, gfp_t gfp)
{
+ static struct lock_class_key nofs_marks_lock;
struct fsnotify_group *group;
group = kzalloc(sizeof(struct fsnotify_group), gfp);
@@ -133,6 +135,17 @@ static struct fsnotify_group *__fsnotify_alloc_group(
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
+ group->flags = flags;
+ /*
+ * For most backends, eviction of inode with a mark is not expected,
+ * because marks hold a refcount on the inode against eviction.
+ *
+ * Use a different lockdep class for groups that support evictable
+ * inode marks, because with evictable marks, mark_mutex is NOT
+ * fs-reclaim safe - the mutex is taken when evicting inodes.
+ */
+ if (flags & FSNOTIFY_GROUP_NOFS)
+ lockdep_set_class(&group->mark_mutex, &nofs_marks_lock);
return group;
}
@@ -140,20 +153,15 @@ static struct fsnotify_group *__fsnotify_alloc_group(
/*
* Create a new fsnotify_group and hold a reference for the group returned.
*/
-struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
+struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops,
+ int flags)
{
- return __fsnotify_alloc_group(ops, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
+ gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT :
+ GFP_KERNEL;
-/*
- * Create a new fsnotify_group and hold a reference for the group returned.
- */
-struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops)
-{
- return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT);
+ return __fsnotify_alloc_group(ops, flags, gfp);
}
-EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group);
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on)
{
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 2007e3711916..7d5df7a21539 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -22,6 +22,25 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
return container_of(fse, struct inotify_event_info, fse);
}
+/*
+ * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to
+ * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is
+ * used only internally to the kernel.
+ */
+#define INOTIFY_USER_MASK (IN_ALL_EVENTS)
+
+static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark)
+{
+ __u32 mask = fsn_mark->mask & INOTIFY_USER_MASK;
+
+ if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK)
+ mask |= IN_EXCL_UNLINK;
+ if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
+ mask |= IN_ONESHOT;
+
+ return mask;
+}
+
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index d92d7b0adc9a..49cfe2ae6d23 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -122,7 +122,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsnotify_destroy_event(group, fsn_event);
}
- if (inode_mark->mask & IN_ONESHOT)
+ if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
fsnotify_destroy_mark(inode_mark, group);
return 0;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 54583f62dc44..ed42a189faa2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -110,11 +110,26 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
mask |= FS_EVENT_ON_CHILD;
/* mask off the flags used to open the fd */
- mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
+ mask |= (arg & INOTIFY_USER_MASK);
return mask;
}
+#define INOTIFY_MARK_FLAGS \
+ (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT)
+
+static inline unsigned int inotify_arg_to_flags(u32 arg)
+{
+ unsigned int flags = 0;
+
+ if (arg & IN_EXCL_UNLINK)
+ flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK;
+ if (arg & IN_ONESHOT)
+ flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT;
+
+ return flags;
+}
+
static inline u32 inotify_mask_to_arg(__u32 mask)
{
return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
@@ -526,13 +541,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
__u32 old_mask, new_mask;
- __u32 mask;
- int add = (arg & IN_MASK_ADD);
+ int replace = !(arg & IN_MASK_ADD);
int create = (arg & IN_MASK_CREATE);
int ret;
- mask = inotify_arg_to_mask(inode, arg);
-
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark)
return -ENOENT;
@@ -545,10 +557,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
spin_lock(&fsn_mark->lock);
old_mask = fsn_mark->mask;
- if (add)
- fsn_mark->mask |= mask;
- else
- fsn_mark->mask = mask;
+ if (replace) {
+ fsn_mark->mask = 0;
+ fsn_mark->flags &= ~INOTIFY_MARK_FLAGS;
+ }
+ fsn_mark->mask |= inotify_arg_to_mask(inode, arg);
+ fsn_mark->flags |= inotify_arg_to_flags(arg);
new_mask = fsn_mark->mask;
spin_unlock(&fsn_mark->lock);
@@ -579,19 +593,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
u32 arg)
{
struct inotify_inode_mark *tmp_i_mark;
- __u32 mask;
int ret;
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
- mask = inotify_arg_to_mask(inode, arg);
-
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
return -ENOMEM;
fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
- tmp_i_mark->fsn_mark.mask = mask;
+ tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg);
+ tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg);
tmp_i_mark->wd = -1;
ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
@@ -628,13 +640,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
{
int ret = 0;
- mutex_lock(&group->mark_mutex);
+ fsnotify_group_lock(group);
/* try to update and existing watch with the new arg */
ret = inotify_update_existing_watch(group, inode, arg);
/* no mark present, try to add a new one */
if (ret == -ENOENT)
ret = inotify_new_watch(group, inode, arg);
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
return ret;
}
@@ -644,7 +656,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
struct fsnotify_group *group;
struct inotify_event_info *oevent;
- group = fsnotify_alloc_user_group(&inotify_fsnotify_ops);
+ group = fsnotify_alloc_group(&inotify_fsnotify_ops,
+ FSNOTIFY_GROUP_USER);
if (IS_ERR(group))
return group;
@@ -845,9 +858,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
- BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
- BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 4853184f7dde..c74ef947447d 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -116,20 +116,64 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
return *fsnotify_conn_mask_p(conn);
}
-static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
+static void fsnotify_get_inode_ref(struct inode *inode)
+{
+ ihold(inode);
+ atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
+}
+
+/*
+ * Grab or drop inode reference for the connector if needed.
+ *
+ * When it's time to drop the reference, we only clear the HAS_IREF flag and
+ * return the inode object. fsnotify_drop_object() will be resonsible for doing
+ * iput() outside of spinlocks. This happens when last mark that wanted iref is
+ * detached.
+ */
+static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
+ bool want_iref)
+{
+ bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF;
+ struct inode *inode = NULL;
+
+ if (conn->type != FSNOTIFY_OBJ_TYPE_INODE ||
+ want_iref == has_iref)
+ return NULL;
+
+ if (want_iref) {
+ /* Pin inode if any mark wants inode refcount held */
+ fsnotify_get_inode_ref(fsnotify_conn_inode(conn));
+ conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF;
+ } else {
+ /* Unpin inode after detach of last mark that wanted iref */
+ inode = fsnotify_conn_inode(conn);
+ conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF;
+ }
+
+ return inode;
+}
+
+static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
u32 new_mask = 0;
+ bool want_iref = false;
struct fsnotify_mark *mark;
assert_spin_locked(&conn->lock);
/* We can get detached connector here when inode is getting unlinked. */
if (!fsnotify_valid_obj_type(conn->type))
- return;
+ return NULL;
hlist_for_each_entry(mark, &conn->list, obj_list) {
- if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
- new_mask |= fsnotify_calc_mask(mark);
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
+ continue;
+ new_mask |= fsnotify_calc_mask(mark);
+ if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
+ !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
+ want_iref = true;
}
*fsnotify_conn_mask_p(conn) = new_mask;
+
+ return fsnotify_update_iref(conn, want_iref);
}
/*
@@ -169,12 +213,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
-static void fsnotify_get_inode_ref(struct inode *inode)
-{
- ihold(inode);
- atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
-}
-
static void fsnotify_put_inode_ref(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -213,6 +251,10 @@ static void *fsnotify_detach_connector_from_object(
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
+
+ /* Unpin inode when detaching from connector */
+ if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
+ inode = NULL;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
@@ -274,7 +316,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
- __fsnotify_recalc_mask(conn);
+ objp = __fsnotify_recalc_mask(conn);
+ type = conn->type;
}
WRITE_ONCE(mark->connector, NULL);
spin_unlock(&conn->lock);
@@ -398,9 +441,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
*/
void fsnotify_detach_mark(struct fsnotify_mark *mark)
{
- struct fsnotify_group *group = mark->group;
-
- WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
+ fsnotify_group_assert_locked(mark->group);
WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
refcount_read(&mark->refcnt) < 1 +
!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
@@ -452,9 +493,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
void fsnotify_destroy_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group)
{
- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+ fsnotify_group_lock(group);
fsnotify_detach_mark(mark);
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
fsnotify_free_mark(mark);
}
EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
@@ -499,7 +540,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
unsigned int obj_type,
__kernel_fsid_t *fsid)
{
- struct inode *inode = NULL;
struct fsnotify_mark_connector *conn;
conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
@@ -507,6 +547,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
return -ENOMEM;
spin_lock_init(&conn->lock);
INIT_HLIST_HEAD(&conn->list);
+ conn->flags = 0;
conn->type = obj_type;
conn->obj = connp;
/* Cache fsid of filesystem containing the object */
@@ -517,10 +558,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
conn->fsid.val[0] = conn->fsid.val[1] = 0;
conn->flags = 0;
}
- if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
- inode = fsnotify_conn_inode(conn);
- fsnotify_get_inode_ref(inode);
- }
fsnotify_get_sb_connectors(conn);
/*
@@ -529,8 +566,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
*/
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
- if (inode)
- fsnotify_put_inode_ref(inode);
fsnotify_put_sb_connectors(conn);
kmem_cache_free(fsnotify_mark_connector_cachep, conn);
}
@@ -574,7 +609,7 @@ out:
static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
fsnotify_connp_t *connp,
unsigned int obj_type,
- int allow_dups, __kernel_fsid_t *fsid)
+ int add_flags, __kernel_fsid_t *fsid)
{
struct fsnotify_mark *lmark, *last = NULL;
struct fsnotify_mark_connector *conn;
@@ -633,7 +668,7 @@ restart:
if ((lmark->group == mark->group) &&
(lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
- !allow_dups) {
+ !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) {
err = -EEXIST;
goto out_err;
}
@@ -668,12 +703,12 @@ out_err:
*/
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type,
- int allow_dups, __kernel_fsid_t *fsid)
+ int add_flags, __kernel_fsid_t *fsid)
{
struct fsnotify_group *group = mark->group;
int ret = 0;
- BUG_ON(!mutex_is_locked(&group->mark_mutex));
+ fsnotify_group_assert_locked(group);
/*
* LOCKING ORDER!!!!
@@ -688,12 +723,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
- ret = fsnotify_add_mark_list(mark, connp, obj_type, allow_dups, fsid);
+ ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid);
if (ret)
goto err;
- if (mark->mask || mark->ignored_mask)
- fsnotify_recalc_mask(mark->connector);
+ fsnotify_recalc_mask(mark->connector);
return ret;
err:
@@ -708,15 +742,15 @@ err:
}
int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
- unsigned int obj_type, int allow_dups,
+ unsigned int obj_type, int add_flags,
__kernel_fsid_t *fsid)
{
int ret;
struct fsnotify_group *group = mark->group;
- mutex_lock(&group->mark_mutex);
- ret = fsnotify_add_mark_locked(mark, connp, obj_type, allow_dups, fsid);
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_lock(group);
+ ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
+ fsnotify_group_unlock(group);
return ret;
}
EXPORT_SYMBOL_GPL(fsnotify_add_mark);
@@ -770,24 +804,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
* move marks to free to to_free list in one go and then free marks in
* to_free list one by one.
*/
- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+ fsnotify_group_lock(group);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
if (mark->connector->type == obj_type)
list_move(&mark->g_list, &to_free);
}
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
clear:
while (1) {
- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+ fsnotify_group_lock(group);
if (list_empty(head)) {
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
break;
}
mark = list_first_entry(head, struct fsnotify_mark, g_list);
fsnotify_get_mark(mark);
fsnotify_detach_mark(mark);
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
fsnotify_free_mark(mark);
fsnotify_put_mark(mark);
}
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 419cadcd7ff5..edc28555814c 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -66,6 +66,7 @@
FAN_MARK_ONLYDIR | \
FAN_MARK_IGNORED_MASK | \
FAN_MARK_IGNORED_SURV_MODIFY | \
+ FAN_MARK_EVICTABLE | \
FAN_MARK_FLUSH)
/*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0805b74cae44..9560734759fa 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -20,6 +20,7 @@
#include <linux/user_namespace.h>
#include <linux/refcount.h>
#include <linux/mempool.h>
+#include <linux/sched/mm.h>
/*
* IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -55,7 +56,6 @@
#define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */
#define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */
-#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */
/*
* Set on inode mark that cares about things that happen to its children.
* Always set for dnotify and inotify.
@@ -66,7 +66,6 @@
#define FS_RENAME 0x10000000 /* File was renamed */
#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */
#define FS_ISDIR 0x40000000 /* event occurred against dir */
-#define FS_IN_ONESHOT 0x80000000 /* only send event once */
#define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO)
@@ -106,8 +105,7 @@
FS_ERROR)
/* Extra flags that may be reported with event or control handling of events */
-#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
- FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
+#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
#define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
@@ -213,6 +211,12 @@ struct fsnotify_group {
unsigned int priority;
bool shutdown; /* group is being shut down, don't queue more events */
+#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
+#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */
+#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */
+ int flags;
+ unsigned int owner_flags; /* stored flags of mark_mutex owner */
+
/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
struct mutex mark_mutex; /* protect marks_list */
atomic_t user_waits; /* Number of tasks waiting for user
@@ -253,6 +257,31 @@ struct fsnotify_group {
};
};
+/*
+ * These helpers are used to prevent deadlock when reclaiming inodes with
+ * evictable marks of the same group that is allocating a new mark.
+ */
+static inline void fsnotify_group_lock(struct fsnotify_group *group)
+{
+ mutex_lock(&group->mark_mutex);
+ if (group->flags & FSNOTIFY_GROUP_NOFS)
+ group->owner_flags = memalloc_nofs_save();
+}
+
+static inline void fsnotify_group_unlock(struct fsnotify_group *group)
+{
+ if (group->flags & FSNOTIFY_GROUP_NOFS)
+ memalloc_nofs_restore(group->owner_flags);
+ mutex_unlock(&group->mark_mutex);
+}
+
+static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
+{
+ WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
+ if (group->flags & FSNOTIFY_GROUP_NOFS)
+ WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
+}
+
/* When calling fsnotify tell it if the data is a path or inode */
enum fsnotify_data_type {
FSNOTIFY_EVENT_NONE,
@@ -370,6 +399,7 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
struct fsnotify_iter_info {
struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
+ struct fsnotify_group *current_group;
unsigned int report_mask;
int srcu_idx;
};
@@ -386,20 +416,31 @@ static inline void fsnotify_iter_set_report_type(
iter_info->report_mask |= (1U << iter_type);
}
-static inline void fsnotify_iter_set_report_type_mark(
- struct fsnotify_iter_info *iter_info, int iter_type,
- struct fsnotify_mark *mark)
+static inline struct fsnotify_mark *fsnotify_iter_mark(
+ struct fsnotify_iter_info *iter_info, int iter_type)
{
- iter_info->marks[iter_type] = mark;
- iter_info->report_mask |= (1U << iter_type);
+ if (fsnotify_iter_should_report_type(iter_info, iter_type))
+ return iter_info->marks[iter_type];
+ return NULL;
+}
+
+static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type,
+ struct fsnotify_mark **markp)
+{
+ while (type < FSNOTIFY_ITER_TYPE_COUNT) {
+ *markp = fsnotify_iter_mark(iter, type);
+ if (*markp)
+ break;
+ type++;
+ }
+ return type;
}
#define FSNOTIFY_ITER_FUNCS(name, NAME) \
static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
struct fsnotify_iter_info *iter_info) \
{ \
- return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \
- iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \
+ return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \
}
FSNOTIFY_ITER_FUNCS(inode, INODE)
@@ -409,6 +450,11 @@ FSNOTIFY_ITER_FUNCS(sb, SB)
#define fsnotify_foreach_iter_type(type) \
for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
+#define fsnotify_foreach_iter_mark_type(iter, mark, type) \
+ for (type = 0; \
+ type = fsnotify_iter_step(iter, type, &mark), \
+ type < FSNOTIFY_ITER_TYPE_COUNT; \
+ type++)
/*
* fsnotify_connp_t is what we embed in objects which connector can be attached
@@ -427,6 +473,7 @@ struct fsnotify_mark_connector {
spinlock_t lock;
unsigned short type; /* Type of object [lock] */
#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01
+#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02
unsigned short flags; /* flags [lock] */
__kernel_fsid_t fsid; /* fsid of filesystem containing object */
union {
@@ -473,9 +520,15 @@ struct fsnotify_mark {
struct fsnotify_mark_connector *connector;
/* Events types to ignore [mark->lock, group->mark_mutex] */
__u32 ignored_mask;
-#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01
-#define FSNOTIFY_MARK_FLAG_ALIVE 0x02
-#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04
+ /* General fsnotify mark flags */
+#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
+#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
+ /* inotify mark flags */
+#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010
+#define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020
+ /* fanotify mark flags */
+#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
+#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
unsigned int flags; /* flags [mark->lock] */
};
@@ -541,8 +594,9 @@ static inline void fsnotify_update_flags(struct dentry *dentry)
/* called from fsnotify listeners, such as fanotify or dnotify */
/* create a new group */
-extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
-extern struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops);
+extern struct fsnotify_group *fsnotify_alloc_group(
+ const struct fsnotify_ops *ops,
+ int flags);
/* get reference to a group */
extern void fsnotify_get_group(struct fsnotify_group *group);
/* drop reference on a group from fsnotify_alloc_group */
@@ -635,26 +689,26 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
/* attach the mark to the object */
extern int fsnotify_add_mark(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type,
- int allow_dups, __kernel_fsid_t *fsid);
+ int add_flags, __kernel_fsid_t *fsid);
extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp,
- unsigned int obj_type, int allow_dups,
+ unsigned int obj_type, int add_flags,
__kernel_fsid_t *fsid);
/* attach the mark to the inode */
static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct inode *inode,
- int allow_dups)
+ int add_flags)
{
return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL);
+ FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL);
}
static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
struct inode *inode,
- int allow_dups)
+ int add_flags)
{
return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_INODE, allow_dups,
+ FSNOTIFY_OBJ_TYPE_INODE, add_flags,
NULL);
}
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index e8ac38cc2fd6..f1f89132d60e 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -82,6 +82,7 @@
#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
#define FAN_MARK_FLUSH 0x00000080
/* FAN_MARK_FILESYSTEM is 0x00000100 */
+#define FAN_MARK_EVICTABLE 0x00000200
/* These are NOT bitwise flags. Both bits can be used togther. */
#define FAN_MARK_INODE 0x00000000
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 02348b48447c..6432a37ac1c9 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
audit_update_mark(audit_mark, dentry->d_inode);
audit_mark->rule = krule;
- ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true);
+ ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
if (ret < 0) {
fsnotify_put_mark(&audit_mark->mark);
audit_mark = ERR_PTR(ret);
@@ -181,7 +181,8 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = {
static int __init audit_fsnotify_init(void)
{
- audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops);
+ audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops,
+ FSNOTIFY_GROUP_DUPS);
if (IS_ERR(audit_fsnotify_group)) {
audit_fsnotify_group = NULL;
audit_panic("cannot create audit fsnotify group");
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index e7315d487163..e867c17d3f84 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -351,7 +351,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
struct audit_chunk *new;
int size;
- mutex_lock(&audit_tree_group->mark_mutex);
+ fsnotify_group_lock(audit_tree_group);
/*
* mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock.
@@ -368,7 +368,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
fsnotify_detach_mark(mark);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
audit_mark_put_chunk(chunk);
fsnotify_free_mark(mark);
return;
@@ -385,12 +385,12 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
*/
replace_chunk(new, chunk);
spin_unlock(&hash_lock);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
audit_mark_put_chunk(chunk);
return;
out_mutex:
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
}
/* Call with group->mark_mutex held, releases it */
@@ -400,19 +400,19 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
struct audit_chunk *chunk = alloc_chunk(1);
if (!chunk) {
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
return -ENOMEM;
}
mark = alloc_mark();
if (!mark) {
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
kfree(chunk);
return -ENOMEM;
}
if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark);
kfree(chunk);
return -ENOSPC;
@@ -422,7 +422,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) {
spin_unlock(&hash_lock);
fsnotify_detach_mark(mark);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
fsnotify_free_mark(mark);
fsnotify_put_mark(mark);
kfree(chunk);
@@ -444,7 +444,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
*/
insert_hash(chunk);
spin_unlock(&hash_lock);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
/*
* Drop our initial reference. When mark we point to is getting freed,
* we get notification through ->freeing_mark callback and cleanup
@@ -462,7 +462,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
struct audit_node *p;
int n;
- mutex_lock(&audit_tree_group->mark_mutex);
+ fsnotify_group_lock(audit_tree_group);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
if (!mark)
return create_chunk(inode, tree);
@@ -478,7 +478,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark);
return 0;
}
@@ -487,7 +487,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1);
if (!chunk) {
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark);
return -ENOMEM;
}
@@ -495,7 +495,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock);
if (tree->goner) {
spin_unlock(&hash_lock);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark);
kfree(chunk);
return 0;
@@ -515,7 +515,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/
replace_chunk(chunk, old);
spin_unlock(&hash_lock);
- mutex_unlock(&audit_tree_group->mark_mutex);
+ fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old);
@@ -1044,12 +1044,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
{
struct audit_chunk *chunk;
- mutex_lock(&mark->group->mark_mutex);
+ fsnotify_group_lock(mark->group);
spin_lock(&hash_lock);
chunk = mark_chunk(mark);
replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock);
- mutex_unlock(&mark->group->mark_mutex);
+ fsnotify_group_unlock(mark->group);
if (chunk) {
evict_chunk(chunk);
audit_mark_put_chunk(chunk);
@@ -1074,7 +1074,7 @@ static int __init audit_tree_init(void)
audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
- audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
+ audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0);
if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches");
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 713b256be944..4b0957aa2cd4 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -493,7 +493,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = {
static int __init audit_watch_init(void)
{
- audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops);
+ audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0);
if (IS_ERR(audit_watch_group)) {
audit_watch_group = NULL;
audit_panic("cannot create audit fsnotify group");