summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 19:03:48 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 19:03:48 +0300
commitb948abf53a381a0c681aadd612e2affba47f62bc (patch)
tree006012adf8876af6fa1bdefa6361a673a9142c83
parenta2e5ad45a9741068f357de4dbff50bb37c233e1b (diff)
parent65f2673832d4647f6d46783d8d745d2b15d090c4 (diff)
downloadlinux-b948abf53a381a0c681aadd612e2affba47f62bc.tar.xz
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs update from Miklos Szeredi: "The biggest part of this is making st_dev/st_ino on the overlay behave like a normal filesystem (i.e. st_ino doesn't change on copy up, st_dev is the same for all files and directories). Currently this only works if all layers are on the same filesystem, but future work will move the general case towards more sane behavior. There are also miscellaneous fixes, including fixes to handling append-only files. There's a small change in the VFS, but that only has an effect on overlayfs, since otherwise file->f_path.dentry->inode and file_inode(file) are always the same" * 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: ovl: update documentation w.r.t. constant inode numbers ovl: persistent inode numbers for upper hardlinks ovl: merge getattr for dir and nondir ovl: constant st_ino/st_dev across copy up ovl: persistent inode number for directories ovl: set the ORIGIN type flag ovl: lookup non-dir copy-up-origin by file handle ovl: use an auxiliary var for overlay root entry ovl: store file handle of lower inode on copy up ovl: check if all layers are on the same fs ovl: do not set overlay.opaque on non-dir create ovl: check IS_APPEND() on real upper inode vfs: ftruncate check IS_APPEND() on real upper inode ovl: Use designated initializers ovl: lockdep annotate of nested stacked overlayfs inode lock
-rw-r--r--Documentation/filesystems/overlayfs.txt9
-rw-r--r--fs/open.c3
-rw-r--r--fs/overlayfs/copy_up.c82
-rw-r--r--fs/overlayfs/dir.c37
-rw-r--r--fs/overlayfs/inode.c103
-rw-r--r--fs/overlayfs/namei.c141
-rw-r--r--fs/overlayfs/overlayfs.h41
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/super.c40
-rw-r--r--fs/overlayfs/util.c19
10 files changed, 425 insertions, 52 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 634d03e20c2d..c9e884b52698 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -21,12 +21,19 @@ from accessing the corresponding object from the original filesystem.
This is most obvious from the 'st_dev' field returned by stat(2).
While directories will report an st_dev from the overlay-filesystem,
-all non-directory objects will report an st_dev from the lower or
+non-directory objects may report an st_dev from the lower filesystem or
upper filesystem that is providing the object. Similarly st_ino will
only be unique when combined with st_dev, and both of these can change
over the lifetime of a non-directory object. Many applications and
tools ignore these values and will not be affected.
+In the special case of all overlay layers on the same underlying
+filesystem, all objects will report an st_dev from the overlay
+filesystem and st_ino from the underlying filesystem. This will
+make the overlay mount more compliant with filesystem scanners and
+overlay objects will be distinguishable from the corresponding
+objects in the original filesystem.
+
Upper and Lower
---------------
diff --git a/fs/open.c b/fs/open.c
index 373787afd638..6d2d2b33ac54 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -193,7 +193,8 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
goto out_putf;
error = -EPERM;
- if (IS_APPEND(inode))
+ /* Check IS_APPEND on real upper inode */
+ if (IS_APPEND(file_inode(f.file)))
goto out_putf;
sb_start_write(inode->i_sb);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 906ea6c93260..9008ab9fbd2e 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
#include <linux/namei.h>
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
+#include <linux/exportfs.h>
#include "overlayfs.h"
#include "ovl_entry.h"
@@ -232,6 +233,79 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
+{
+ struct ovl_fh *fh;
+ int fh_type, fh_len, dwords;
+ void *buf;
+ int buflen = MAX_HANDLE_SZ;
+
+ buf = kmalloc(buflen, GFP_TEMPORARY);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * We encode a non-connectable file handle for non-dir, because we
+ * only need to find the lower inode number and we don't want to pay
+ * the price or reconnecting the dentry.
+ */
+ dwords = buflen >> 2;
+ fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
+ buflen = (dwords << 2);
+
+ fh = ERR_PTR(-EIO);
+ if (WARN_ON(fh_type < 0) ||
+ WARN_ON(buflen > MAX_HANDLE_SZ) ||
+ WARN_ON(fh_type == FILEID_INVALID))
+ goto out;
+
+ BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
+ fh_len = offsetof(struct ovl_fh, fid) + buflen;
+ fh = kmalloc(fh_len, GFP_KERNEL);
+ if (!fh) {
+ fh = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ fh->version = OVL_FH_VERSION;
+ fh->magic = OVL_FH_MAGIC;
+ fh->type = fh_type;
+ fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ fh->len = fh_len;
+ fh->uuid = *uuid;
+ memcpy(fh->fid, buf, buflen);
+
+out:
+ kfree(buf);
+ return fh;
+}
+
+static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+ struct dentry *upper)
+{
+ struct super_block *sb = lower->d_sb;
+ uuid_be *uuid = (uuid_be *) &sb->s_uuid;
+ const struct ovl_fh *fh = NULL;
+ int err;
+
+ /*
+ * When lower layer doesn't support export operations store a 'null' fh,
+ * so we can use the overlay.origin xattr to distignuish between a copy
+ * up and a pure upper inode.
+ */
+ if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+ uuid_be_cmp(*uuid, NULL_UUID_BE)) {
+ fh = ovl_encode_fh(lower, uuid);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+ }
+
+ err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0);
+ kfree(fh);
+
+ return err;
+}
+
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *dentry, struct path *lowerpath,
struct kstat *stat, const char *link,
@@ -316,6 +390,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (err)
goto out_cleanup;
+ /*
+ * Store identifier of lower inode in upper inode xattr to
+ * allow lookup of the copy up origin inode.
+ */
+ err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+ if (err)
+ goto out_cleanup;
+
if (tmpfile)
err = ovl_do_link(temp, udir, upper, true);
else
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 6515796460df..723b98b90698 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -138,36 +138,6 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
return err;
}
-static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
-{
- struct dentry *dentry = path->dentry;
- int err;
- enum ovl_path_type type;
- struct path realpath;
- const struct cred *old_cred;
-
- type = ovl_path_real(dentry, &realpath);
- old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getattr(&realpath, stat, request_mask, flags);
- revert_creds(old_cred);
- if (err)
- return err;
-
- stat->dev = dentry->d_sb->s_dev;
- stat->ino = dentry->d_inode->i_ino;
-
- /*
- * It's probably not worth it to count subdirs to get the
- * correct link count. nlink=1 seems to pacify 'find' and
- * other utilities.
- */
- if (OVL_TYPE_MERGE(type))
- stat->nlink = 1;
-
- return 0;
-}
-
/* Common operations required to be done after creation of file on upper */
static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
@@ -182,6 +152,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
inc_nlink(inode);
}
d_instantiate(dentry, inode);
+ /* Force lookup of new upper hardlink to find its lower */
+ if (hardlink)
+ d_drop(dentry);
}
static bool ovl_type_merge(struct dentry *dentry)
@@ -210,7 +183,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
if (err)
goto out_dput;
- if (ovl_type_merge(dentry->d_parent)) {
+ if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
/* Setting opaque here is just an optimization, allow to fail */
ovl_set_opaque(dentry, newdentry);
}
@@ -1070,7 +1043,7 @@ const struct inode_operations ovl_dir_inode_operations = {
.create = ovl_create,
.mknod = ovl_mknod,
.permission = ovl_permission,
- .getattr = ovl_dir_getattr,
+ .getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f8fe6bf2036d..ad9547f82da5 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -57,18 +57,78 @@ out:
return err;
}
-static int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ovl_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
+ enum ovl_path_type type;
struct path realpath;
const struct cred *old_cred;
+ bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
int err;
- ovl_path_real(dentry, &realpath);
+ type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getattr(&realpath, stat, request_mask, flags);
+ if (err)
+ goto out;
+
+ /*
+ * When all layers are on the same fs, all real inode number are
+ * unique, so we use the overlay st_dev, which is friendly to du -x.
+ *
+ * We also use st_ino of the copy up origin, if we know it.
+ * This guaranties constant st_dev/st_ino across copy up.
+ *
+ * If filesystem supports NFS export ops, this also guaranties
+ * persistent st_ino across mount cycle.
+ */
+ if (ovl_same_sb(dentry->d_sb)) {
+ if (OVL_TYPE_ORIGIN(type)) {
+ struct kstat lowerstat;
+ u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
+
+ ovl_path_lower(dentry, &realpath);
+ err = vfs_getattr(&realpath, &lowerstat,
+ lowermask, flags);
+ if (err)
+ goto out;
+
+ WARN_ON_ONCE(stat->dev != lowerstat.dev);
+ /*
+ * Lower hardlinks are broken on copy up to different
+ * upper files, so we cannot use the lower origin st_ino
+ * for those different files, even for the same fs case.
+ */
+ if (is_dir || lowerstat.nlink == 1)
+ stat->ino = lowerstat.ino;
+ }
+ stat->dev = dentry->d_sb->s_dev;
+ } else if (is_dir) {
+ /*
+ * If not all layers are on the same fs the pair {real st_ino;
+ * overlay st_dev} is not unique, so use the non persistent
+ * overlay st_ino.
+ *
+ * Always use the overlay st_dev for directories, so 'find
+ * -xdev' will scan the entire overlay mount and won't cross the
+ * overlay mount boundaries.
+ */
+ stat->dev = dentry->d_sb->s_dev;
+ stat->ino = dentry->d_inode->i_ino;
+ }
+
+ /*
+ * It's probably not worth it to count subdirs to get the
+ * correct link count. nlink=1 seems to pacify 'find' and
+ * other utilities.
+ */
+ if (is_dir && OVL_TYPE_MERGE(type))
+ stat->nlink = 1;
+
+out:
revert_creds(old_cred);
+
return err;
}
@@ -303,6 +363,41 @@ static const struct inode_operations ovl_symlink_inode_operations = {
.update_time = ovl_update_time,
};
+/*
+ * It is possible to stack overlayfs instance on top of another
+ * overlayfs instance as lower layer. We need to annonate the
+ * stackable i_mutex locks according to stack level of the super
+ * block instance. An overlayfs instance can never be in stack
+ * depth 0 (there is always a real fs below it). An overlayfs
+ * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ *
+ * For example, here is a snip from /proc/lockdep_chains after
+ * dir_iterate of nested overlayfs:
+ *
+ * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
+ * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
+ * [...] &type->i_mutex_dir_key (stack_depth=0)
+ */
+#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
+
+static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+ static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
+ static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+
+ int depth = inode->i_sb->s_stack_depth - 1;
+
+ if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
+ depth = 0;
+
+ if (S_ISDIR(inode->i_mode))
+ lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
+ else
+ lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+#endif
+}
+
static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_ino = get_next_ino();
@@ -312,6 +407,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
#endif
+ ovl_lockdep_annotate_inode_mutex_key(inode);
+
switch (mode & S_IFMT) {
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index b8b077821fb0..bad0f665a635 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -12,6 +12,8 @@
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/ratelimit.h>
+#include <linux/mount.h>
+#include <linux/exportfs.h>
#include "overlayfs.h"
#include "ovl_entry.h"
@@ -81,6 +83,90 @@ invalid:
goto err_free;
}
+static int ovl_acceptable(void *ctx, struct dentry *dentry)
+{
+ return 1;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+ struct vfsmount *mnt)
+{
+ int res;
+ struct ovl_fh *fh = NULL;
+ struct dentry *origin = NULL;
+ int bytes;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ if (res < 0) {
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return NULL;
+ goto fail;
+ }
+ /* Zero size value means "copied up but origin unknown" */
+ if (res == 0)
+ return NULL;
+
+ fh = kzalloc(res, GFP_TEMPORARY);
+ if (!fh)
+ return ERR_PTR(-ENOMEM);
+
+ res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+ if (res < 0)
+ goto fail;
+
+ if (res < sizeof(struct ovl_fh) || res < fh->len)
+ goto invalid;
+
+ if (fh->magic != OVL_FH_MAGIC)
+ goto invalid;
+
+ /* Treat larger version and unknown flags as "origin unknown" */
+ if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ goto out;
+
+ /* Treat endianness mismatch as "origin unknown" */
+ if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ goto out;
+
+ bytes = (fh->len - offsetof(struct ovl_fh, fid));
+
+ /*
+ * Make sure that the stored uuid matches the uuid of the lower
+ * layer where file handle will be decoded.
+ */
+ if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+ goto out;
+
+ origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+ bytes >> 2, (int)fh->type,
+ ovl_acceptable, NULL);
+ if (IS_ERR(origin)) {
+ /* Treat stale file handle as "origin unknown" */
+ if (origin == ERR_PTR(-ESTALE))
+ origin = NULL;
+ goto out;
+ }
+
+ if (ovl_dentry_weird(origin) ||
+ ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
+ dput(origin);
+ origin = NULL;
+ goto invalid;
+ }
+
+out:
+ kfree(fh);
+ return origin;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ goto out;
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ goto out;
+}
+
static bool ovl_is_opaquedir(struct dentry *dentry)
{
int res;
@@ -192,6 +278,45 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return 0;
}
+
+static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
+ struct path **stackp, unsigned int *ctrp)
+{
+ struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
+ struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
+ struct vfsmount *mnt;
+ struct dentry *origin;
+
+ if (!same_sb || !roe->numlower)
+ return 0;
+
+ /*
+ * Since all layers are on the same fs, we use the first layer for
+ * decoding the file handle. We may get a disconnected dentry,
+ * which is fine, because we only need to hold the origin inode in
+ * cache and use its inode number. We may even get a connected dentry,
+ * that is not under the first layer's root. That is also fine for
+ * using it's inode number - it's the same as if we held a reference
+ * to a dentry in first layer that was moved under us.
+ */
+ mnt = roe->lowerstack[0].mnt;
+
+ origin = ovl_get_origin(upperdentry, mnt);
+ if (IS_ERR_OR_NULL(origin))
+ return PTR_ERR(origin);
+
+ BUG_ON(*stackp || *ctrp);
+ *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+ if (!*stackp) {
+ dput(origin);
+ return -ENOMEM;
+ }
+ **stackp = (struct path) { .dentry = origin, .mnt = mnt };
+ *ctrp = 1;
+
+ return 0;
+}
+
/*
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
@@ -220,6 +345,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
const struct cred *old_cred;
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+ struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
unsigned int ctr = 0;
@@ -253,13 +379,20 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = -EREMOTE;
goto out;
}
+ if (upperdentry && !d.is_dir) {
+ BUG_ON(!d.stop || d.redirect);
+ err = ovl_check_origin(dentry, upperdentry,
+ &stack, &ctr);
+ if (err)
+ goto out;
+ }
if (d.redirect) {
upperredirect = kstrdup(d.redirect, GFP_KERNEL);
if (!upperredirect)
goto out_put_upper;
if (d.redirect[0] == '/')
- poe = dentry->d_sb->s_root->d_fsdata;
+ poe = roe;
}
upperopaque = d.opaque;
}
@@ -290,10 +423,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (d.stop)
break;
- if (d.redirect &&
- d.redirect[0] == '/' &&
- poe != dentry->d_sb->s_root->d_fsdata) {
- poe = dentry->d_sb->s_root->d_fsdata;
+ if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+ poe = roe;
/* Find the current layer on the root dentry */
for (i = 0; i < poe->numlower; i++)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 741dc0b6931f..caa36cb9c46d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -8,18 +8,56 @@
*/
#include <linux/kernel.h>
+#include <linux/uuid.h>
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
__OVL_PATH_MERGE = (1 << 1),
+ __OVL_PATH_ORIGIN = (1 << 2),
};
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
+#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
+#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+
+/*
+ * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
+ * where:
+ * origin.fh - exported file handle of the lower file
+ * origin.uuid - uuid of the lower filesystem
+ */
+#define OVL_FH_VERSION 0
+#define OVL_FH_MAGIC 0xfb
+
+/* CPU byte order required for fid decoding: */
+#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0)
+#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1)
+
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
+
+#if defined(__LITTLE_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN 0
+#elif defined(__BIG_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
+#else
+#error Endianness not defined
+#endif
+
+/* On-disk and in-memeory format for redirect by file handle */
+struct ovl_fh {
+ u8 version; /* 0 */
+ u8 magic; /* 0xfb */
+ u8 len; /* size of this header + size of fid */
+ u8 flags; /* OVL_FH_FLAG_* */
+ u8 type; /* fid_type of fid */
+ uuid_be uuid; /* uuid of filesystem */
+ u8 fid[0]; /* file identifier */
+} __packed;
#define OVL_ISUPPER_MASK 1UL
@@ -151,6 +189,7 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
+struct super_block *ovl_same_sb(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
bool ovl_dentry_weird(struct dentry *dentry);
@@ -197,6 +236,8 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
/* inode.c */
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
int ovl_permission(struct inode *inode, int mask);
int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 59614faa14c3..b2023ddb8532 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -29,6 +29,8 @@ struct ovl_fs {
const struct cred *creator_cred;
bool tmpfile;
wait_queue_head_t copyup_wq;
+ /* sb common to all layers */
+ struct super_block *same_sb;
};
/* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c9e70d39c1ea..9828b7de8999 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -49,11 +49,28 @@ static void ovl_dentry_release(struct dentry *dentry)
}
}
+static int ovl_check_append_only(struct inode *inode, int flag)
+{
+ /*
+ * This test was moot in vfs may_open() because overlay inode does
+ * not have the S_APPEND flag, so re-check on real upper inode
+ */
+ if (IS_APPEND(inode)) {
+ if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
+ return -EPERM;
+ if (flag & O_TRUNC)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode,
unsigned int open_flags)
{
struct dentry *real;
+ int err;
if (!d_is_reg(dentry)) {
if (!inode || inode == d_inode(dentry))
@@ -65,15 +82,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
return dentry;
if (open_flags) {
- int err = ovl_open_maybe_copy_up(dentry, open_flags);
-
+ err = ovl_open_maybe_copy_up(dentry, open_flags);
if (err)
return ERR_PTR(err);
}
real = ovl_dentry_upper(dentry);
- if (real && (!inode || inode == d_inode(real)))
+ if (real && (!inode || inode == d_inode(real))) {
+ if (!inode) {
+ err = ovl_check_append_only(d_inode(real), open_flags);
+ if (err)
+ return ERR_PTR(err);
+ }
return real;
+ }
real = ovl_dentry_lower(dentry);
if (!real)
@@ -709,8 +731,8 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
- struct path upperpath = { NULL, NULL };
- struct path workpath = { NULL, NULL };
+ struct path upperpath = { };
+ struct path workpath = { };
struct dentry *root_dentry;
struct inode *realinode;
struct ovl_entry *oe;
@@ -892,11 +914,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->lower_mnt[ufs->numlower] = mnt;
ufs->numlower++;
+
+ /* Check if all lower layers are on same sb */
+ if (i == 0)
+ ufs->same_sb = mnt->mnt_sb;
+ else if (ufs->same_sb != mnt->mnt_sb)
+ ufs->same_sb = NULL;
}
/* If the upper fs is nonexistent, we mark overlayfs r/o too */
if (!ufs->upper_mnt)
sb->s_flags |= MS_RDONLY;
+ else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
+ ufs->same_sb = NULL;
if (remote)
sb->s_d_op = &ovl_reval_dentry_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 6e610a205e15..cfdea47313a1 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -40,6 +40,13 @@ const struct cred *ovl_override_creds(struct super_block *sb)
return override_creds(ofs->creator_cred);
}
+struct super_block *ovl_same_sb(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return ofs->same_sb;
+}
+
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -75,11 +82,13 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
type = __OVL_PATH_UPPER;
/*
- * Non-dir dentry can hold lower dentry from previous
- * location.
+ * Non-dir dentry can hold lower dentry of its copy up origin.
*/
- if (oe->numlower && d_is_dir(dentry))
- type |= __OVL_PATH_MERGE;
+ if (oe->numlower) {
+ type |= __OVL_PATH_ORIGIN;
+ if (d_is_dir(dentry))
+ type |= __OVL_PATH_MERGE;
+ }
} else {
if (oe->numlower > 1)
type |= __OVL_PATH_MERGE;
@@ -100,7 +109,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
{
struct ovl_entry *oe = dentry->d_fsdata;
- *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
+ *path = oe->numlower ? oe->lowerstack[0] : (struct path) { };
}
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)