summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c414
1 files changed, 243 insertions, 171 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 3f1829b3ab5b..8533087e5dac 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include <linux/personality.h>
#include <linux/security.h>
@@ -566,7 +567,7 @@ struct nameidata {
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags, state;
- unsigned seq, m_seq, r_seq;
+ unsigned seq, next_seq, m_seq, r_seq;
int last_type;
unsigned depth;
int total_link_count;
@@ -664,6 +665,13 @@ static void drop_links(struct nameidata *nd)
}
}
+static void leave_rcu(struct nameidata *nd)
+{
+ nd->flags &= ~LOOKUP_RCU;
+ nd->seq = nd->next_seq = 0;
+ rcu_read_unlock();
+}
+
static void terminate_walk(struct nameidata *nd)
{
drop_links(nd);
@@ -677,8 +685,7 @@ static void terminate_walk(struct nameidata *nd)
nd->state &= ~ND_ROOT_GRABBED;
}
} else {
- nd->flags &= ~LOOKUP_RCU;
- rcu_read_unlock();
+ leave_rcu(nd);
}
nd->depth = 0;
nd->path.mnt = NULL;
@@ -729,13 +736,6 @@ static bool legitimize_links(struct nameidata *nd)
static bool legitimize_root(struct nameidata *nd)
{
- /*
- * For scoped-lookups (where nd->root has been zeroed), we need to
- * restart the whole lookup from scratch -- because set_root() is wrong
- * for these lookups (nd->dfd is the root, not the filesystem root).
- */
- if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
- return false;
/* Nothing to do if nd->root is zero or is managed by the VFS user. */
if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET))
return true;
@@ -771,14 +771,13 @@ static bool try_to_unlazy(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
- nd->flags &= ~LOOKUP_RCU;
if (unlikely(!legitimize_links(nd)))
goto out1;
if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
goto out;
if (unlikely(!legitimize_root(nd)))
goto out;
- rcu_read_unlock();
+ leave_rcu(nd);
BUG_ON(nd->inode != parent->d_inode);
return true;
@@ -786,7 +785,7 @@ out1:
nd->path.mnt = NULL;
nd->path.dentry = NULL;
out:
- rcu_read_unlock();
+ leave_rcu(nd);
return false;
}
@@ -794,24 +793,27 @@ out:
* try_to_unlazy_next - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
* @dentry: next dentry to step into
- * @seq: seq number to check @dentry against
* Returns: true on success, false on failure
*
- * Similar to to try_to_unlazy(), but here we have the next dentry already
+ * Similar to try_to_unlazy(), but here we have the next dentry already
* picked by rcu-walk and want to legitimize that in addition to the current
* nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context.
* Nothing should touch nameidata between try_to_unlazy_next() failure and
* terminate_walk().
*/
-static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
{
+ int res;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- nd->flags &= ~LOOKUP_RCU;
if (unlikely(!legitimize_links(nd)))
goto out2;
- if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
- goto out2;
+ res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
+ if (unlikely(res)) {
+ if (res > 0)
+ goto out2;
+ goto out1;
+ }
if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
goto out1;
@@ -824,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
*/
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
goto out;
- if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
+ if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
goto out_dput;
/*
* Sequence counts matched. Now make sure that the root is
@@ -832,7 +834,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
*/
if (unlikely(!legitimize_root(nd)))
goto out_dput;
- rcu_read_unlock();
+ leave_rcu(nd);
return true;
out2:
@@ -840,10 +842,10 @@ out2:
out1:
nd->path.dentry = NULL;
out:
- rcu_read_unlock();
+ leave_rcu(nd);
return false;
out_dput:
- rcu_read_unlock();
+ leave_rcu(nd);
dput(dentry);
return false;
}
@@ -968,7 +970,7 @@ static int nd_jump_root(struct nameidata *nd)
d = nd->path.dentry;
nd->inode = d->d_inode;
nd->seq = nd->root_seq;
- if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
+ if (read_seqcount_retry(&d->d_seq, nd->seq))
return -ECHILD;
} else {
path_put(&nd->path);
@@ -984,7 +986,7 @@ static int nd_jump_root(struct nameidata *nd)
* Helper to directly jump to a known parsed path from ->get_link,
* caller must have taken a reference to path beforehand.
*/
-int nd_jump_link(struct path *path)
+int nd_jump_link(const struct path *path)
{
int error = -ELOOP;
struct nameidata *nd = current->nameidata;
@@ -1031,7 +1033,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_symlinks",
.data = &sysctl_protected_symlinks,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
@@ -1040,7 +1042,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_hardlinks",
.data = &sysctl_protected_hardlinks,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
@@ -1049,7 +1051,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_fifos",
.data = &sysctl_protected_fifos,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
@@ -1058,7 +1060,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_regular",
.data = &sysctl_protected_regular,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
@@ -1176,7 +1178,7 @@ static bool safe_hardlink_source(struct user_namespace *mnt_userns,
*
* Returns 0 if successful, -ve on error.
*/
-int may_linkat(struct user_namespace *mnt_userns, struct path *link)
+int may_linkat(struct user_namespace *mnt_userns, const struct path *link)
{
struct inode *inode = link->dentry->d_inode;
@@ -1472,8 +1474,7 @@ EXPORT_SYMBOL(follow_down);
* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
* we meet a managed dentry that would need blocking.
*/
-static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
- struct inode **inode, unsigned *seqp)
+static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
{
struct dentry *dentry = path->dentry;
unsigned int flags = dentry->d_flags;
@@ -1502,15 +1503,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
path->mnt = &mounted->mnt;
dentry = path->dentry = mounted->mnt.mnt_root;
nd->state |= ND_JUMPED;
- *seqp = read_seqcount_begin(&dentry->d_seq);
- *inode = dentry->d_inode;
- /*
- * We don't need to re-check ->d_seq after this
- * ->d_inode read - there will be an RCU delay
- * between mount hash removal and ->mnt_root
- * becoming unpinned.
- */
+ nd->next_seq = read_seqcount_begin(&dentry->d_seq);
flags = dentry->d_flags;
+ // makes sure that non-RCU pathwalk could reach
+ // this state.
+ if (read_seqretry(&mount_lock, nd->m_seq))
+ return false;
continue;
}
if (read_seqretry(&mount_lock, nd->m_seq))
@@ -1521,8 +1519,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
}
static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
- struct path *path, struct inode **inode,
- unsigned int *seqp)
+ struct path *path)
{
bool jumped;
int ret;
@@ -1530,16 +1527,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
path->mnt = nd->path.mnt;
path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
- unsigned int seq = *seqp;
- if (unlikely(!*inode))
- return -ENOENT;
- if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
+ unsigned int seq = nd->next_seq;
+ if (likely(__follow_mount_rcu(nd, path)))
return 0;
- if (!try_to_unlazy_next(nd, dentry, seq))
- return -ECHILD;
- // *path might've been clobbered by __follow_mount_rcu()
+ // *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
+ nd->next_seq = seq;
+ if (!try_to_unlazy_next(nd, dentry))
+ return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
if (jumped) {
@@ -1552,9 +1548,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt);
- } else {
- *inode = d_backing_inode(path->dentry);
- *seqp = 0; /* out of RCU mode, so the value doesn't matter */
}
return ret;
}
@@ -1613,9 +1606,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
return dentry;
}
-static struct dentry *lookup_fast(struct nameidata *nd,
- struct inode **inode,
- unsigned *seqp)
+static struct dentry *lookup_fast(struct nameidata *nd)
{
struct dentry *dentry, *parent = nd->path.dentry;
int status = 1;
@@ -1626,8 +1617,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
* going to fall back to non-racy lookup.
*/
if (nd->flags & LOOKUP_RCU) {
- unsigned seq;
- dentry = __d_lookup_rcu(parent, &nd->last, &seq);
+ dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
if (unlikely(!dentry)) {
if (!try_to_unlazy(nd))
return ERR_PTR(-ECHILD);
@@ -1635,28 +1625,16 @@ static struct dentry *lookup_fast(struct nameidata *nd,
}
/*
- * This sequence count validates that the inode matches
- * the dentry name information from lookup.
- */
- *inode = d_backing_inode(dentry);
- if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
- return ERR_PTR(-ECHILD);
-
- /*
* This sequence count validates that the parent had no
* changes while we did the lookup of the dentry above.
- *
- * The memory barrier in read_seqcount_begin of child is
- * enough, we can use __read_seqcount_retry here.
*/
- if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
+ if (read_seqcount_retry(&parent->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
- *seqp = seq;
status = d_revalidate(dentry, nd->flags);
if (likely(status > 0))
return dentry;
- if (!try_to_unlazy_next(nd, dentry, seq))
+ if (!try_to_unlazy_next(nd, dentry))
return ERR_PTR(-ECHILD);
if (status == -ECHILD)
/* we'd been told to redo it in non-rcu mode */
@@ -1737,7 +1715,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns,
return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
}
-static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
+static int reserve_stack(struct nameidata *nd, struct path *link)
{
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
return -ELOOP;
@@ -1752,9 +1730,9 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
if (nd->flags & LOOKUP_RCU) {
// we need to grab link before we do unlazy. And we can't skip
// unlazy even if we fail to grab the link - cleanup needs it
- bool grabbed_link = legitimize_path(nd, link, seq);
+ bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
- if (!try_to_unlazy(nd) != 0 || !grabbed_link)
+ if (!try_to_unlazy(nd) || !grabbed_link)
return -ECHILD;
if (nd_alloc_stack(nd))
@@ -1766,11 +1744,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
static const char *pick_link(struct nameidata *nd, struct path *link,
- struct inode *inode, unsigned seq, int flags)
+ struct inode *inode, int flags)
{
struct saved *last;
const char *res;
- int error = reserve_stack(nd, link, seq);
+ int error = reserve_stack(nd, link);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
@@ -1780,7 +1758,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
last = nd->stack + nd->depth++;
last->link = *link;
clear_delayed_call(&last->done);
- last->seq = seq;
+ last->seq = nd->next_seq;
if (flags & WALK_TRAILING) {
error = may_follow_link(nd, inode);
@@ -1842,43 +1820,50 @@ all_done: // pure jump
* to do this check without having to look at inode->i_op,
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
+ *
+ * NOTE: dentry must be what nd->next_seq had been sampled from.
*/
static const char *step_into(struct nameidata *nd, int flags,
- struct dentry *dentry, struct inode *inode, unsigned seq)
+ struct dentry *dentry)
{
struct path path;
- int err = handle_mounts(nd, dentry, &path, &inode, &seq);
+ struct inode *inode;
+ int err = handle_mounts(nd, dentry, &path);
if (err < 0)
return ERR_PTR(err);
+ inode = path.dentry->d_inode;
if (likely(!d_is_symlink(path.dentry)) ||
((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
(flags & WALK_NOFOLLOW)) {
/* not a symlink or should not follow */
- if (!(nd->flags & LOOKUP_RCU)) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
+ return ERR_PTR(-ECHILD);
+ if (unlikely(!inode))
+ return ERR_PTR(-ENOENT);
+ } else {
dput(nd->path.dentry);
if (nd->path.mnt != path.mnt)
mntput(nd->path.mnt);
}
nd->path = path;
nd->inode = inode;
- nd->seq = seq;
+ nd->seq = nd->next_seq;
return NULL;
}
if (nd->flags & LOOKUP_RCU) {
/* make sure that d_is_symlink above matches inode */
- if (read_seqcount_retry(&path.dentry->d_seq, seq))
+ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
} else {
if (path.mnt == nd->path.mnt)
mntget(path.mnt);
}
- return pick_link(nd, &path, inode, seq, flags);
+ return pick_link(nd, &path, inode, flags);
}
-static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
- struct inode **inodep,
- unsigned *seqp)
+static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
{
struct dentry *parent, *old;
@@ -1895,30 +1880,30 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
nd->path = path;
nd->inode = path.dentry->d_inode;
nd->seq = seq;
- if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+ // makes sure that non-RCU pathwalk could reach this state
+ if (read_seqretry(&mount_lock, nd->m_seq))
return ERR_PTR(-ECHILD);
/* we know that mountpoint was pinned */
}
old = nd->path.dentry;
parent = old->d_parent;
- *inodep = parent->d_inode;
- *seqp = read_seqcount_begin(&parent->d_seq);
- if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
+ nd->next_seq = read_seqcount_begin(&parent->d_seq);
+ // makes sure that non-RCU pathwalk could reach this state
+ if (read_seqcount_retry(&old->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
if (unlikely(!path_connected(nd->path.mnt, parent)))
return ERR_PTR(-ECHILD);
return parent;
in_root:
- if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+ if (read_seqretry(&mount_lock, nd->m_seq))
return ERR_PTR(-ECHILD);
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-ECHILD);
- return NULL;
+ nd->next_seq = nd->seq;
+ return nd->path.dentry;
}
-static struct dentry *follow_dotdot(struct nameidata *nd,
- struct inode **inodep,
- unsigned *seqp)
+static struct dentry *follow_dotdot(struct nameidata *nd)
{
struct dentry *parent;
@@ -1942,15 +1927,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
dput(parent);
return ERR_PTR(-ENOENT);
}
- *seqp = 0;
- *inodep = parent->d_inode;
return parent;
in_root:
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-EXDEV);
- dget(nd->path.dentry);
- return NULL;
+ return dget(nd->path.dentry);
}
static const char *handle_dots(struct nameidata *nd, int type)
@@ -1958,8 +1940,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
if (type == LAST_DOTDOT) {
const char *error = NULL;
struct dentry *parent;
- struct inode *inode;
- unsigned seq;
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd));
@@ -1967,17 +1947,12 @@ static const char *handle_dots(struct nameidata *nd, int type)
return error;
}
if (nd->flags & LOOKUP_RCU)
- parent = follow_dotdot_rcu(nd, &inode, &seq);
+ parent = follow_dotdot_rcu(nd);
else
- parent = follow_dotdot(nd, &inode, &seq);
+ parent = follow_dotdot(nd);
if (IS_ERR(parent))
return ERR_CAST(parent);
- if (unlikely(!parent))
- error = step_into(nd, WALK_NOFOLLOW,
- nd->path.dentry, nd->inode, nd->seq);
- else
- error = step_into(nd, WALK_NOFOLLOW,
- parent, inode, seq);
+ error = step_into(nd, WALK_NOFOLLOW, parent);
if (unlikely(error))
return error;
@@ -1989,9 +1964,9 @@ static const char *handle_dots(struct nameidata *nd, int type)
* some fallback).
*/
smp_rmb();
- if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
+ if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))
return ERR_PTR(-EAGAIN);
- if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
+ if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))
return ERR_PTR(-EAGAIN);
}
}
@@ -2001,8 +1976,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
static const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
- struct inode *inode;
- unsigned seq;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
@@ -2013,7 +1986,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
put_link(nd);
return handle_dots(nd, nd->last_type);
}
- dentry = lookup_fast(nd, &inode, &seq);
+ dentry = lookup_fast(nd);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (unlikely(!dentry)) {
@@ -2023,7 +1996,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
}
if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
- return step_into(nd, flags, dentry, inode, seq);
+ return step_into(nd, flags, dentry);
}
/*
@@ -2378,6 +2351,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
flags &= ~LOOKUP_RCU;
if (flags & LOOKUP_RCU)
rcu_read_lock();
+ else
+ nd->seq = nd->next_seq = 0;
nd->flags = flags;
nd->state |= ND_JUMPED;
@@ -2479,8 +2454,8 @@ static int handle_lookup_down(struct nameidata *nd)
{
if (!(nd->flags & LOOKUP_RCU))
dget(nd->path.dentry);
- return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
- nd->path.dentry, nd->inode, nd->seq));
+ nd->next_seq = nd->seq;
+ return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -2768,7 +2743,8 @@ struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
EXPORT_SYMBOL(lookup_one);
/**
- * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * lookup_one_unlocked - filesystem helper to lookup single pathname component
+ * @mnt_userns: idmapping of the mount the lookup is performed from
* @name: pathname component to lookup
* @base: base directory to lookup from
* @len: maximum length @len should be interpreted to
@@ -2779,14 +2755,15 @@ EXPORT_SYMBOL(lookup_one);
* Unlike lookup_one_len, it should be called without the parent
* i_mutex held, and will take the i_mutex itself if necessary.
*/
-struct dentry *lookup_one_len_unlocked(const char *name,
- struct dentry *base, int len)
+struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns,
+ const char *name, struct dentry *base,
+ int len)
{
struct qstr this;
int err;
struct dentry *ret;
- err = lookup_one_common(&init_user_ns, name, base, len, &this);
+ err = lookup_one_common(mnt_userns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2795,6 +2772,59 @@ struct dentry *lookup_one_len_unlocked(const char *name,
ret = lookup_slow(&this, base, 0);
return ret;
}
+EXPORT_SYMBOL(lookup_one_unlocked);
+
+/**
+ * lookup_one_positive_unlocked - filesystem helper to lookup single
+ * pathname component
+ * @mnt_userns: idmapping of the mount the lookup is performed from
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
+ * known positive or ERR_PTR(). This is what most of the users want.
+ *
+ * Note that pinned negative with unlocked parent _can_ become positive at any
+ * time, so callers of lookup_one_unlocked() need to be very careful; pinned
+ * positives have >d_inode stable, so this one avoids such problems.
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * The helper should be called without i_mutex held.
+ */
+struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns,
+ const char *name,
+ struct dentry *base, int len)
+{
+ struct dentry *ret = lookup_one_unlocked(mnt_userns, name, base, len);
+
+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(lookup_one_positive_unlocked);
+
+/**
+ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * Unlike lookup_one_len, it should be called without the parent
+ * i_mutex held, and will take the i_mutex itself if necessary.
+ */
+struct dentry *lookup_one_len_unlocked(const char *name,
+ struct dentry *base, int len)
+{
+ return lookup_one_unlocked(&init_user_ns, name, base, len);
+}
EXPORT_SYMBOL(lookup_one_len_unlocked);
/*
@@ -2808,12 +2838,7 @@ EXPORT_SYMBOL(lookup_one_len_unlocked);
struct dentry *lookup_positive_unlocked(const char *name,
struct dentry *base, int len)
{
- struct dentry *ret = lookup_one_len_unlocked(name, base, len);
- if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
- dput(ret);
- ret = ERR_PTR(-ENOENT);
- }
- return ret;
+ return lookup_one_positive_unlocked(&init_user_ns, name, base, len);
}
EXPORT_SYMBOL(lookup_positive_unlocked);
@@ -2999,6 +3024,65 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
EXPORT_SYMBOL(unlock_rename);
/**
+ * mode_strip_umask - handle vfs umask stripping
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode to be created in @dir
+ *
+ * Umask stripping depends on whether or not the filesystem supports POSIX
+ * ACLs. If the filesystem doesn't support it umask stripping is done directly
+ * in here. If the filesystem does support POSIX ACLs umask stripping is
+ * deferred until the filesystem calls posix_acl_create().
+ *
+ * Returns: mode
+ */
+static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
+{
+ if (!IS_POSIXACL(dir))
+ mode &= ~current_umask();
+ return mode;
+}
+
+/**
+ * vfs_prepare_mode - prepare the mode to be used for a new inode
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode
+ * @mask_perms: allowed permission by the vfs
+ * @type: type of file to be created
+ *
+ * This helper consolidates and enforces vfs restrictions on the @mode of a new
+ * object to be created.
+ *
+ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
+ * the kernel documentation for mode_strip_umask()). Moving umask stripping
+ * after setgid stripping allows the same ordering for both non-POSIX ACL and
+ * POSIX ACL supporting filesystems.
+ *
+ * Note that it's currently valid for @type to be 0 if a directory is created.
+ * Filesystems raise that flag individually and we need to check whether each
+ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
+ * non-zero type.
+ *
+ * Returns: mode to be passed to the filesystem
+ */
+static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns,
+ const struct inode *dir, umode_t mode,
+ umode_t mask_perms, umode_t type)
+{
+ mode = mode_strip_sgid(mnt_userns, dir, mode);
+ mode = mode_strip_umask(dir, mode);
+
+ /*
+ * Apply the vfs mandated allowed permission mask and set the type of
+ * file to be created before we call into the filesystem.
+ */
+ mode &= (mask_perms & ~S_IFMT);
+ mode |= (type & S_IFMT);
+
+ return mode;
+}
+
+/**
* vfs_create - create new file
* @mnt_userns: user namespace of the mount the inode was found from
* @dir: inode of @dentry
@@ -3023,8 +3107,8 @@ int vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
if (!dir->i_op->create)
return -EACCES; /* shouldn't it be ENOSYS? */
- mode &= S_IALLUGO;
- mode |= S_IFREG;
+
+ mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IALLUGO, S_IFREG);
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
@@ -3287,8 +3371,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
if (open_flag & O_CREAT) {
if (open_flag & O_EXCL)
open_flag &= ~O_TRUNC;
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
+ mode = vfs_prepare_mode(mnt_userns, dir->d_inode, mode, mode, mode);
if (likely(got_write))
create_error = may_o_create(mnt_userns, &nd->path,
dentry, mode);
@@ -3349,8 +3432,6 @@ static const char *open_last_lookups(struct nameidata *nd,
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool got_write = false;
- unsigned seq;
- struct inode *inode;
struct dentry *dentry;
const char *res;
@@ -3366,7 +3447,7 @@ static const char *open_last_lookups(struct nameidata *nd,
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
/* we _can_ be in RCU mode here */
- dentry = lookup_fast(nd, &inode, &seq);
+ dentry = lookup_fast(nd);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (likely(dentry))
@@ -3420,7 +3501,7 @@ static const char *open_last_lookups(struct nameidata *nd,
finish_lookup:
if (nd->depth)
put_link(nd);
- res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
+ res = step_into(nd, WALK_TRAILING, dentry);
if (unlikely(res))
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
return res;
@@ -3521,6 +3602,7 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
child = d_alloc(dentry, &slash_name);
if (unlikely(!child))
goto out_err;
+ mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
if (error)
goto out_err;
@@ -3673,18 +3755,14 @@ static struct dentry *filename_create(int dfd, struct filename *name,
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct qstr last;
+ bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
+ unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
+ unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
int type;
int err2;
int error;
- bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
-
- /*
- * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
- * other flags passed in are ignored!
- */
- lookup_flags &= LOOKUP_REVAL;
- error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
if (error)
return ERR_PTR(error);
@@ -3698,11 +3776,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write(path->mnt);
/*
- * Do the final lookup.
+ * Do the final lookup. Suppress 'create' if there is a trailing
+ * '/', and a directory wasn't requested.
*/
- lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+ if (last.name[last.len] && !want_dir)
+ create_flags = 0;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- dentry = __lookup_hash(&last, path->dentry, lookup_flags);
+ dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
@@ -3716,7 +3796,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
- if (unlikely(!is_dir && last.name[last.len])) {
+ if (unlikely(!create_flags)) {
error = -ENOENT;
goto fail;
}
@@ -3800,6 +3880,7 @@ int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (!dir->i_op->mknod)
return -EPERM;
+ mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
error = devcgroup_inode_mknod(mode, dev);
if (error)
return error;
@@ -3850,9 +3931,8 @@ retry:
if (IS_ERR(dentry))
goto out1;
- if (!IS_POSIXACL(path.dentry->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&path, dentry, mode, dev);
+ error = security_path_mknod(&path, dentry,
+ mode_strip_umask(path.dentry->d_inode, mode), dev);
if (error)
goto out2;
@@ -3922,7 +4002,7 @@ int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (!dir->i_op->mkdir)
return -EPERM;
- mode &= (S_IRWXUGO|S_ISVTX);
+ mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IRWXUGO | S_ISVTX, 0);
error = security_inode_mkdir(dir, dentry, mode);
if (error)
return error;
@@ -3950,9 +4030,8 @@ retry:
if (IS_ERR(dentry))
goto out_putname;
- if (!IS_POSIXACL(path.dentry->d_inode))
- mode &= ~current_umask();
- error = security_path_mkdir(&path, dentry, mode);
+ error = security_path_mkdir(&path, dentry,
+ mode_strip_umask(path.dentry->d_inode, mode));
if (!error) {
struct user_namespace *mnt_userns;
mnt_userns = mnt_user_ns(path.mnt);
@@ -5003,28 +5082,28 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
}
EXPORT_SYMBOL(page_readlink);
-/*
- * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
- */
-int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
+int page_symlink(struct inode *inode, const char *symname, int len)
{
struct address_space *mapping = inode->i_mapping;
+ const struct address_space_operations *aops = mapping->a_ops;
+ bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
struct page *page;
void *fsdata;
int err;
- unsigned int flags = 0;
- if (nofs)
- flags |= AOP_FLAG_NOFS;
+ unsigned int flags;
retry:
- err = pagecache_write_begin(NULL, mapping, 0, len-1,
- flags, &page, &fsdata);
+ if (nofs)
+ flags = memalloc_nofs_save();
+ err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata);
+ if (nofs)
+ memalloc_nofs_restore(flags);
if (err)
goto fail;
memcpy(page_address(page), symname, len-1);
- err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
+ err = aops->write_end(NULL, mapping, 0, len-1, len-1,
page, fsdata);
if (err < 0)
goto fail;
@@ -5036,13 +5115,6 @@ retry:
fail:
return err;
}
-EXPORT_SYMBOL(__page_symlink);
-
-int page_symlink(struct inode *inode, const char *symname, int len)
-{
- return __page_symlink(inode, symname, len,
- !mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
-}
EXPORT_SYMBOL(page_symlink);
const struct inode_operations page_symlink_inode_operations = {