summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_inode.c123
-rw-r--r--fs/9p/vfs_inode_dotl.c39
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/autofs4/symlink.c5
-rw-r--r--fs/befs/linuxvfs.c57
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/backref.c17
-rw-r--r--fs/btrfs/extent-tree.c20
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifs_unicode.c182
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c23
-rw-r--r--fs/cifs/connect.c3
-rw-r--r--fs/cifs/dir.c3
-rw-r--r--fs/cifs/file.c7
-rw-r--r--fs/cifs/inode.c31
-rw-r--r--fs/cifs/link.c31
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/configfs/symlink.c31
-rw-r--r--fs/dcache.c24
-rw-r--r--fs/debugfs/file.c12
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/dlm/lowcomms.c16
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/exofs.h4
-rw-r--r--fs/exofs/inode.c9
-rw-r--r--fs/exofs/namei.c5
-rw-r--r--fs/exofs/symlink.c55
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/namei.c3
-rw-r--r--fs/ext2/symlink.c10
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext3/symlink.c10
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/namei.c11
-rw-r--r--fs/ext4/symlink.c48
-rw-r--r--fs/f2fs/Kconfig19
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/acl.c46
-rw-r--r--fs/f2fs/checkpoint.c56
-rw-r--r--fs/f2fs/crypto.c491
-rw-r--r--fs/f2fs/crypto_fname.c440
-rw-r--r--fs/f2fs/crypto_key.c255
-rw-r--r--fs/f2fs/crypto_policy.c209
-rw-r--r--fs/f2fs/data.c593
-rw-r--r--fs/f2fs/debug.c11
-rw-r--r--fs/f2fs/dir.c194
-rw-r--r--fs/f2fs/f2fs.h320
-rw-r--r--fs/f2fs/f2fs_crypto.h151
-rw-r--r--fs/f2fs/file.c516
-rw-r--r--fs/f2fs/gc.c115
-rw-r--r--fs/f2fs/hash.c3
-rw-r--r--fs/f2fs/inline.c43
-rw-r--r--fs/f2fs/inode.c9
-rw-r--r--fs/f2fs/namei.c390
-rw-r--r--fs/f2fs/node.c48
-rw-r--r--fs/f2fs/node.h22
-rw-r--r--fs/f2fs/recovery.c28
-rw-r--r--fs/f2fs/segment.c250
-rw-r--r--fs/f2fs/segment.h1
-rw-r--r--fs/f2fs/super.c178
-rw-r--r--fs/f2fs/trace.c6
-rw-r--r--fs/f2fs/trace.h4
-rw-r--r--fs/f2fs/xattr.c3
-rw-r--r--fs/f2fs/xattr.h4
-rw-r--r--fs/fhandle.c5
-rw-r--r--fs/freevxfs/vxfs_extern.h3
-rw-r--r--fs/freevxfs/vxfs_immed.c34
-rw-r--r--fs/freevxfs/vxfs_inode.c7
-rw-r--r--fs/fuse/dir.c22
-rw-r--r--fs/gfs2/inode.c10
-rw-r--r--fs/hostfs/hostfs_kern.c15
-rw-r--r--fs/hppfs/hppfs.c13
-rw-r--r--fs/inode.c31
-rw-r--r--fs/jffs2/dir.c1
-rw-r--r--fs/jffs2/fs.c8
-rw-r--r--fs/jffs2/readinode.c27
-rw-r--r--fs/jffs2/symlink.c45
-rw-r--r--fs/jfs/inode.c3
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/jfs/symlink.c10
-rw-r--r--fs/kernfs/symlink.c25
-rw-r--r--fs/libfs.c25
-rw-r--r--fs/logfs/dir.c1
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namei.c1453
-rw-r--r--fs/namespace.c27
-rw-r--r--fs/nfs/nfs4proc.c3
-rw-r--r--fs/nfs/symlink.c19
-rw-r--r--fs/nfs/write.c13
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/omfs/bitmap.c2
-rw-r--r--fs/omfs/inode.c10
-rw-r--r--fs/open.c2
-rw-r--r--fs/overlayfs/copy_up.c3
-rw-r--r--fs/overlayfs/dir.c33
-rw-r--r--fs/overlayfs/inode.c35
-rw-r--r--fs/overlayfs/super.c10
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/inode.c9
-rw-r--r--fs/proc/namespaces.c4
-rw-r--r--fs/proc/self.c24
-rw-r--r--fs/proc/thread_self.c22
-rw-r--r--fs/select.c6
-rw-r--r--fs/splice.c1
-rw-r--r--fs/sysv/Makefile2
-rw-r--r--fs/sysv/inode.c5
-rw-r--r--fs/sysv/symlink.c20
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/dir.c1
-rw-r--r--fs/ubifs/file.c11
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/namei.c95
-rw-r--r--fs/udf/super.c26
-rw-r--r--fs/udf/symlink.c3
-rw-r--r--fs/udf/unicode.c49
-rw-r--r--fs/ufs/inode.c5
-rw-r--r--fs/ufs/namei.c3
-rw-r--r--fs/ufs/symlink.c13
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c31
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c9
-rw-r--r--fs/xfs/xfs_attr_inactive.c83
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_inode.c22
-rw-r--r--fs/xfs/xfs_iops.c11
-rw-r--r--fs/xfs/xfs_mount.c34
141 files changed, 5393 insertions, 2277 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index fb9ffcb43277..0923f2cf3c80 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -149,8 +149,6 @@ extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
-extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
- void *p);
extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
struct p9_fid *fid,
struct super_block *sb, int new);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 703342e309f5..510040b04c96 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1224,100 +1224,43 @@ ino_t v9fs_qid2ino(struct p9_qid *qid)
}
/**
- * v9fs_readlink - read a symlink's location (internal version)
+ * v9fs_vfs_follow_link - follow a symlink path
* @dentry: dentry for symlink
- * @buffer: buffer to load symlink location into
- * @buflen: length of buffer
- *
+ * @cookie: place to pass the data to put_link()
*/
-static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
+static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
{
- int retval;
-
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
+ struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
+ struct p9_fid *fid = v9fs_fid_lookup(dentry);
struct p9_wstat *st;
+ char *res;
+
+ p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
- p9_debug(P9_DEBUG_VFS, " %pd\n", dentry);
- retval = -EPERM;
- v9ses = v9fs_dentry2v9ses(dentry);
- fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
- return PTR_ERR(fid);
+ return ERR_CAST(fid);
if (!v9fs_proto_dotu(v9ses))
- return -EBADF;
+ return ERR_PTR(-EBADF);
st = p9_client_stat(fid);
if (IS_ERR(st))
- return PTR_ERR(st);
+ return ERR_CAST(st);
if (!(st->mode & P9_DMSYMLINK)) {
- retval = -EINVAL;
- goto done;
+ p9stat_free(st);
+ kfree(st);
+ return ERR_PTR(-EINVAL);
}
+ res = st->extension;
+ st->extension = NULL;
+ if (strlen(res) >= PATH_MAX)
+ res[PATH_MAX - 1] = '\0';
- /* copy extension buffer into buffer */
- retval = min(strlen(st->extension)+1, (size_t)buflen);
- memcpy(buffer, st->extension, retval);
-
- p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n",
- dentry, st->extension, buflen, buffer);
-
-done:
p9stat_free(st);
kfree(st);
- return retval;
-}
-
-/**
- * v9fs_vfs_follow_link - follow a symlink path
- * @dentry: dentry for symlink
- * @nd: nameidata
- *
- */
-
-static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- int len = 0;
- char *link = __getname();
-
- p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
-
- if (!link)
- link = ERR_PTR(-ENOMEM);
- else {
- len = v9fs_readlink(dentry, link, PATH_MAX);
-
- if (len < 0) {
- __putname(link);
- link = ERR_PTR(len);
- } else
- link[min(len, PATH_MAX-1)] = 0;
- }
- nd_set_link(nd, link);
-
- return NULL;
-}
-
-/**
- * v9fs_vfs_put_link - release a symlink path
- * @dentry: dentry for symlink
- * @nd: nameidata
- * @p: unused
- *
- */
-
-void
-v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
- char *s = nd_get_link(nd);
-
- p9_debug(P9_DEBUG_VFS, " %pd %s\n",
- dentry, IS_ERR(s) ? "<error>" : s);
- if (!IS_ERR(s))
- __putname(s);
+ return *cookie = res;
}
/**
@@ -1370,6 +1313,8 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
}
+#define U32_MAX_DIGITS 10
+
/**
* v9fs_vfs_link - create a hardlink
* @old_dentry: dentry for file to link to
@@ -1383,7 +1328,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int retval;
- char *name;
+ char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */
struct p9_fid *oldfid;
p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n",
@@ -1393,20 +1338,12 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
if (IS_ERR(oldfid))
return PTR_ERR(oldfid);
- name = __getname();
- if (unlikely(!name)) {
- retval = -ENOMEM;
- goto clunk_fid;
- }
-
sprintf(name, "%d\n", oldfid->fid);
retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
- __putname(name);
if (!retval) {
v9fs_refresh_inode(oldfid, d_inode(old_dentry));
v9fs_invalidate_inode_attr(dir);
}
-clunk_fid:
p9_client_clunk(oldfid);
return retval;
}
@@ -1425,7 +1362,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
{
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
int retval;
- char *name;
+ char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1];
u32 perm;
p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
@@ -1435,26 +1372,16 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
if (!new_valid_dev(rdev))
return -EINVAL;
- name = __getname();
- if (!name)
- return -ENOMEM;
/* build extension */
if (S_ISBLK(mode))
sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
else if (S_ISCHR(mode))
sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
- else if (S_ISFIFO(mode))
- *name = 0;
- else if (S_ISSOCK(mode))
+ else
*name = 0;
- else {
- __putname(name);
- return -EINVAL;
- }
perm = unixmode2p9mode(v9ses, mode);
retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
- __putname(name);
return retval;
}
@@ -1530,7 +1457,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
- .put_link = v9fs_vfs_put_link,
+ .put_link = kfree_put_link,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 9861c7c951a6..09e4433717b8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -905,41 +905,24 @@ error:
/**
* v9fs_vfs_follow_link_dotl - follow a symlink path
* @dentry: dentry for symlink
- * @nd: nameidata
- *
+ * @cookie: place to pass the data to put_link()
*/
-static void *
-v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+static const char *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
{
- int retval;
- struct p9_fid *fid;
- char *link = __getname();
+ struct p9_fid *fid = v9fs_fid_lookup(dentry);
char *target;
+ int retval;
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- goto ndset;
- }
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid)) {
- __putname(link);
- link = ERR_CAST(fid);
- goto ndset;
- }
+ if (IS_ERR(fid))
+ return ERR_CAST(fid);
retval = p9_client_readlink(fid, &target);
- if (!retval) {
- strcpy(link, target);
- kfree(target);
- goto ndset;
- }
- __putname(link);
- link = ERR_PTR(retval);
-ndset:
- nd_set_link(nd, link);
- return NULL;
+ if (retval)
+ return ERR_PTR(retval);
+ return *cookie = target;
}
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
@@ -1006,7 +989,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link_dotl,
- .put_link = v9fs_vfs_put_link,
+ .put_link = kfree_put_link,
.getattr = v9fs_vfs_getattr_dotl,
.setattr = v9fs_vfs_setattr_dotl,
.setxattr = generic_setxattr,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 3a57a1b0fb51..b50642870a43 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -85,7 +85,7 @@ int afs_open_socket(void)
return -ENOMEM;
}
- ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
+ ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
if (ret < 0) {
destroy_workqueue(afs_async_calls);
_leave(" = %d [socket]", ret);
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index de58cc7b8076..da0c33481bc0 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,14 +12,13 @@
#include "autofs_i.h"
-static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
if (ino && !autofs4_oz_mode(sbi))
ino->last_used = jiffies;
- nd_set_link(nd, d_inode(dentry)->i_private);
- return NULL;
+ return d_inode(dentry)->i_private;
}
const struct inode_operations autofs4_symlink_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7943533c3868..46aedacfa6a8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,8 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long);
static struct inode *befs_alloc_inode(struct super_block *sb);
static void befs_destroy_inode(struct inode *inode);
static void befs_destroy_inodecache(void);
-static void *befs_follow_link(struct dentry *, struct nameidata *);
-static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
+static const char *befs_follow_link(struct dentry *, void **);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -80,11 +79,6 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
-static const struct inode_operations befs_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = befs_fast_follow_link,
-};
-
static const struct inode_operations befs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = befs_follow_link,
@@ -403,10 +397,12 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &befs_dir_inode_operations;
inode->i_fop = &befs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
- if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
+ if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
inode->i_op = &befs_symlink_inode_operations;
- else
- inode->i_op = &befs_fast_symlink_inode_operations;
+ } else {
+ inode->i_link = befs_ino->i_data.symlink;
+ inode->i_op = &simple_symlink_inode_operations;
+ }
} else {
befs_error(sb, "Inode %lu is not a regular file, "
"directory or symlink. THAT IS WRONG! BeFS has no "
@@ -467,8 +463,8 @@ befs_destroy_inodecache(void)
* The data stream become link name. Unless the LONG_SYMLINK
* flag is set.
*/
-static void *
-befs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *
+befs_follow_link(struct dentry *dentry, void **cookie)
{
struct super_block *sb = dentry->d_sb;
struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
@@ -478,33 +474,20 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
if (len == 0) {
befs_error(sb, "Long symlink with illegal length");
- link = ERR_PTR(-EIO);
- } else {
- befs_debug(sb, "Follow long symlink");
-
- link = kmalloc(len, GFP_NOFS);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- } else if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
- befs_error(sb, "Failed to read entire long symlink");
- link = ERR_PTR(-EIO);
- } else {
- link[len - 1] = '\0';
- }
+ return ERR_PTR(-EIO);
}
- nd_set_link(nd, link);
- return NULL;
-}
-
-
-static void *
-befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
+ befs_debug(sb, "Follow long symlink");
- nd_set_link(nd, befs_ino->i_data.symlink);
- return NULL;
+ link = kmalloc(len, GFP_NOFS);
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ if (befs_read_lsymlink(sb, data, link, len) != len) {
+ kfree(link);
+ befs_error(sb, "Failed to read entire long symlink");
+ return ERR_PTR(-EIO);
+ }
+ link[len - 1] = '\0';
+ return *cookie = link;
}
/*
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 241ef68d2893..cd46e4158830 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
- error = -EINVAL;
+ retval = -EINVAL;
goto out_free_dentry;
}
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 9de772ee0031..614aaa1969bd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -880,6 +880,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
* indirect refs to their parent bytenr.
* When roots are found, they're added to the roots list
*
+ * NOTE: This can return values > 0
+ *
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -1198,6 +1200,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
return ret;
}
+/**
+ * btrfs_check_shared - tell us whether an extent is shared
+ *
+ * @trans: optional trans handle
+ *
+ * btrfs_check_shared uses the backref walking code but will short
+ * circuit as soon as it finds a root or inode that doesn't match the
+ * one passed in. This provides a significant performance benefit for
+ * callers (such as fiemap) which want to know whether the extent is
+ * shared but do not need a ref count.
+ *
+ * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
+ */
int btrfs_check_shared(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 root_objectid,
u64 inum, u64 bytenr)
@@ -1226,11 +1241,13 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, root_objectid, inum);
if (ret == BACKREF_FOUND_SHARED) {
+ /* this is the only condition under which we return 1 */
ret = 1;
break;
}
if (ret < 0 && ret != -ENOENT)
break;
+ ret = 0;
node = ulist_next(tmp, &uiter);
if (!node)
break;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7effed6f2fa6..0ec3acd14cbf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8829,6 +8829,24 @@ again:
goto again;
}
+ /*
+ * if we are changing raid levels, try to allocate a corresponding
+ * block group with the new raid level.
+ */
+ alloc_flags = update_block_group_flags(root, cache->flags);
+ if (alloc_flags != cache->flags) {
+ ret = do_chunk_alloc(trans, root, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ /*
+ * ENOSPC is allowed here, we may have enough space
+ * already allocated at the new raid level to
+ * carry on
+ */
+ if (ret == -ENOSPC)
+ ret = 0;
+ if (ret < 0)
+ goto out;
+ }
ret = set_block_group_ro(cache, 0);
if (!ret)
@@ -8842,7 +8860,9 @@ again:
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(root, cache->flags);
+ lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, root, alloc_flags);
+ unlock_chunks(root->fs_info->chunk_root);
}
mutex_unlock(&root->fs_info->ro_block_group_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 96aebf3bcd5b..174f5e1e00ab 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4625,6 +4625,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
{
u64 chunk_offset;
+ ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex));
chunk_offset = find_next_chunk(extent_root->fs_info);
return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e876e1944519..571acd88606c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -6,7 +6,6 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
-#include <linux/namei.h>
#include <linux/writeback.h>
#include <linux/vmalloc.h>
#include <linux/posix_acl.h>
@@ -819,6 +818,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
else
kfree(sym); /* lost a race */
}
+ inode->i_link = ci->i_symlink;
break;
case S_IFDIR:
inode->i_op = &ceph_dir_iops;
@@ -1691,16 +1691,9 @@ retry:
/*
* symlinks
*/
-static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ceph_inode_info *ci = ceph_inode(d_inode(dentry));
- nd_set_link(nd, ci->i_symlink);
- return NULL;
-}
-
static const struct inode_operations ceph_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = ceph_sym_follow_link,
+ .follow_link = simple_follow_link,
.setattr = ceph_setattr,
.getattr = ceph_getattr,
.setxattr = ceph_setxattr,
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 430e0348c99e..7dc886c9a78f 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -24,6 +24,7 @@
#include "cifsfs.h"
#include "dns_resolve.h"
#include "cifs_debug.h"
+#include "cifs_unicode.h"
static LIST_HEAD(cifs_dfs_automount_list);
@@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
xid = get_xid();
rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
&num_referrals, &referrals,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
free_xid(xid);
cifs_put_tlink(tlink);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0303c6793d90..5a53ac6b1e02 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -27,41 +27,6 @@
#include "cifsglob.h"
#include "cifs_debug.h"
-/*
- * cifs_utf16_bytes - how long will a string be after conversion?
- * @utf16 - pointer to input string
- * @maxbytes - don't go past this many bytes of input string
- * @codepage - destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- */
-int
-cifs_utf16_bytes(const __le16 *from, int maxbytes,
- const struct nls_table *codepage)
-{
- int i;
- int charlen, outlen = 0;
- int maxwords = maxbytes / 2;
- char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
-
- for (i = 0; i < maxwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
- break;
-
- charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
- if (charlen > 0)
- outlen += charlen;
- else
- outlen++;
- }
-
- return outlen;
-}
-
int cifs_remap(struct cifs_sb_info *cifs_sb)
{
int map_type;
@@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target)
* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
*/
static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
int maptype)
{
int len = 1;
+ __u16 src_char;
+
+ src_char = *from;
if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
return len;
@@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
/* if character not one of seven in special remap set */
len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
- if (len <= 0) {
- *target = '?';
- len = 1;
- }
+ if (len <= 0)
+ goto surrogate_pair;
+
+ return len;
+
+surrogate_pair:
+ /* convert SURROGATE_PAIR and IVS */
+ if (strcmp(cp->charset, "utf8"))
+ goto unknown;
+ len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+ if (len <= 0)
+ goto unknown;
+ return len;
+
+unknown:
+ *target = '?';
+ len = 1;
return len;
}
@@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
int nullsize = nls_nullsize(codepage);
int fromwords = fromlen / 2;
char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
+ __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
/*
* because the chars can be of varying widths, we need to take care
@@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
for (i = 0; i < fromwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
break;
+ if (i + 1 < fromwords)
+ ftmp[1] = get_unaligned_le16(&from[i + 1]);
+ else
+ ftmp[1] = 0;
+ if (i + 2 < fromwords)
+ ftmp[2] = get_unaligned_le16(&from[i + 2]);
+ else
+ ftmp[2] = 0;
/*
* check to see if converting this character might make the
@@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
/* put converted char into 'to' buffer */
charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
outlen += charlen;
+
+ /* charlen (=bytes of UTF-8 for 1 character)
+ * 4bytes UTF-8(surrogate pair) is charlen=4
+ * (4bytes UTF-16 code)
+ * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+ * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
+ if (charlen == 4)
+ i++;
+ else if (charlen >= 5)
+ /* 5-6bytes UTF-8 */
+ i += 2;
}
/* properly null-terminate string */
@@ -296,6 +296,46 @@ success:
}
/*
+ * cifs_utf16_bytes - how long will a string be after conversion?
+ * @utf16 - pointer to input string
+ * @maxbytes - don't go past this many bytes of input string
+ * @codepage - destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ */
+int
+cifs_utf16_bytes(const __le16 *from, int maxbytes,
+ const struct nls_table *codepage)
+{
+ int i;
+ int charlen, outlen = 0;
+ int maxwords = maxbytes / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp[3];
+
+ for (i = 0; i < maxwords; i++) {
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
+ break;
+ if (i + 1 < maxwords)
+ ftmp[1] = get_unaligned_le16(&from[i + 1]);
+ else
+ ftmp[1] = 0;
+ if (i + 2 < maxwords)
+ ftmp[2] = get_unaligned_le16(&from[i + 2]);
+ else
+ ftmp[2] = 0;
+
+ charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
+ outlen += charlen;
+ }
+
+ return outlen;
+}
+
+/*
* cifs_strndup_from_utf16 - copy a string from wire format to the local
* codepage
* @src - source string
@@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
char src_char;
__le16 dst_char;
wchar_t tmp;
+ wchar_t *wchar_to; /* UTF-16 */
+ int ret;
+ unicode_t u;
if (map_chars == NO_MAP_UNI_RSVD)
return cifs_strtoUTF16(target, source, PATH_MAX, cp);
+ wchar_to = kzalloc(6, GFP_KERNEL);
+
for (i = 0; i < srclen; j++) {
src_char = source[i];
charlen = 1;
@@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
* if no match, use question mark, which at least in
* some cases serves as wild card
*/
- if (charlen < 1) {
- dst_char = cpu_to_le16(0x003f);
- charlen = 1;
+ if (charlen > 0)
+ goto ctoUTF16;
+
+ /* convert SURROGATE_PAIR */
+ if (strcmp(cp->charset, "utf8") || !wchar_to)
+ goto unknown;
+ if (*(source + i) & 0x80) {
+ charlen = utf8_to_utf32(source + i, 6, &u);
+ if (charlen < 0)
+ goto unknown;
+ } else
+ goto unknown;
+ ret = utf8s_to_utf16s(source + i, charlen,
+ UTF16_LITTLE_ENDIAN,
+ wchar_to, 6);
+ if (ret < 0)
+ goto unknown;
+
+ i += charlen;
+ dst_char = cpu_to_le16(*wchar_to);
+ if (charlen <= 3)
+ /* 1-3bytes UTF-8 to 2bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ else if (charlen == 4) {
+ /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+ * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+ * (charlen=3+4 or 4+4) */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ } else if (charlen >= 5) {
+ /* 5-6bytes UTF-8 to 6bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 2));
+ j++;
+ put_unaligned(dst_char, &target[j]);
}
+ continue;
+
+unknown:
+ dst_char = cpu_to_le16(0x003f);
+ charlen = 1;
}
+
+ctoUTF16:
/*
* character may take more than one byte in the source string,
* but will take exactly two bytes in the target string
@@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
ctoUTF16_out:
put_unaligned(0, &target[j]); /* Null terminate target unicode string */
+ kfree(wchar_to);
return j;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f5089bde3635..0a9fb6b53126 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",nouser_xattr");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
seq_puts(s, ",mapchars");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
+ seq_puts(s, ",mapposix");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
seq_puts(s, ",sfu");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 252f5c15806b..a782b22904e4 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -120,7 +120,7 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
#endif
/* Functions related to symlinks */
-extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
+extern const char *cifs_follow_link(struct dentry *direntry, void **cookie);
extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
int buflen);
extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c31ce98c1704..c63fd1dde25b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid,
extern int CIFSUnixCreateSymLink(const unsigned int xid,
struct cifs_tcon *tcon,
const char *fromName, const char *toName,
- const struct nls_table *nls_codepage);
+ const struct nls_table *nls_codepage, int remap);
extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
struct cifs_tcon *tcon,
const unsigned char *searchName, char **syminfo,
- const struct nls_table *nls_codepage);
+ const struct nls_table *nls_codepage, int remap);
extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
__u16 fid, char **symlinkinfo,
const struct nls_table *nls_codepage);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 84650a51c7c4..f26ffbfc64d8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2784,7 +2784,7 @@ copyRetry:
int
CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon,
const char *fromName, const char *toName,
- const struct nls_table *nls_codepage)
+ const struct nls_table *nls_codepage, int remap)
{
TRANSACTION2_SPI_REQ *pSMB = NULL;
TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -2804,9 +2804,9 @@ createSymLinkRetry:
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len =
- cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName,
- /* find define for this maxpathcomponent */
- PATH_MAX, nls_codepage);
+ cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName,
+ /* find define for this maxpathcomponent */
+ PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
@@ -2828,9 +2828,9 @@ createSymLinkRetry:
data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len_target =
- cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX
- /* find define for this maxpathcomponent */
- , nls_codepage);
+ cifsConvertToUTF16((__le16 *) data_offset, toName,
+ /* find define for this maxpathcomponent */
+ PATH_MAX, nls_codepage, remap);
name_len_target++; /* trailing null */
name_len_target *= 2;
} else { /* BB improve the check for buffer overruns BB */
@@ -3034,7 +3034,7 @@ winCreateHardLinkRetry:
int
CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
const unsigned char *searchName, char **symlinkinfo,
- const struct nls_table *nls_codepage)
+ const struct nls_table *nls_codepage, int remap)
{
/* SMB_QUERY_FILE_UNIX_LINK */
TRANSACTION2_QPI_REQ *pSMB = NULL;
@@ -3055,8 +3055,9 @@ querySymLinkRetry:
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len =
- cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName,
- PATH_MAX, nls_codepage);
+ cifsConvertToUTF16((__le16 *) pSMB->FileName,
+ searchName, PATH_MAX, nls_codepage,
+ remap);
name_len++; /* trailing null */
name_len *= 2;
} else { /* BB improve the check for buffer overruns BB */
@@ -4917,7 +4918,7 @@ getDFSRetry:
strncpy(pSMB->RequestFileName, search_name, name_len);
}
- if (ses->server && ses->server->sign)
+ if (ses->server->sign)
pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
pSMB->hdr.Uid = ses->Suid;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f3bfe08e177b..8383d5ea4202 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
+ mutex_unlock(&server->srv_mutex);
msleep(3000);
} else {
atomic_inc(&tcpSesReconnectCount);
@@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&GlobalMid_Lock);
+ mutex_unlock(&server->srv_mutex);
}
- mutex_unlock(&server->srv_mutex);
} while (server->tcpStatus == CifsNeedReconnect);
return rc;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 338d56936f6a..c3eb998a99bd 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
}
rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
if (rc)
goto mknod_out;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cafbf10521d5..3f50cee79df9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
posix_flags = cifs_posix_convert_flags(f_flags);
rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
poplock, full_path, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
cifs_put_tlink(tlink);
if (rc)
@@ -1553,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
rc = server->ops->mand_unlock_range(cfile, flock, xid);
out:
- if (flock->fl_flags & FL_POSIX)
- posix_lock_file_wait(file, flock);
+ if (flock->fl_flags & FL_POSIX && !rc)
+ rc = posix_lock_file_wait(file, flock);
return rc;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 55b58112d122..f621b44cb800 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* could have done a find first instead but this returns more info */
rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
- cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_sb->local_nls, cifs_remap(cifs_sb));
cifs_put_tlink(tlink);
if (!rc) {
@@ -402,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode,
rc = -ENOMEM;
} else {
/* we already have inode, update it */
+
+ /* if uniqueid is different, return error */
+ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+ CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+ rc = -ESTALE;
+ goto cgiiu_exit;
+ }
+
+ /* if filetype is different, return error */
+ if (unlikely(((*pinode)->i_mode & S_IFMT) !=
+ (fattr.cf_mode & S_IFMT))) {
+ rc = -ESTALE;
+ goto cgiiu_exit;
+ }
+
cifs_fattr_to_inode(*pinode, &fattr);
}
+cgiiu_exit:
return rc;
}
@@ -839,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
if (!*inode)
rc = -ENOMEM;
} else {
+ /* we already have inode, update it */
+
+ /* if filetype is different, return error */
+ if (unlikely(((*inode)->i_mode & S_IFMT) !=
+ (fattr.cf_mode & S_IFMT))) {
+ rc = -ESTALE;
+ goto cgii_exit;
+ }
+
cifs_fattr_to_inode(*inode, &fattr);
}
@@ -2215,8 +2239,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
pTcon = tlink_tcon(tlink);
rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cifs_remap(cifs_sb));
cifs_put_tlink(tlink);
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 252e672d5604..e3548f73bdea 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -626,8 +626,8 @@ cifs_hl_exit:
return rc;
}
-void *
-cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
+const char *
+cifs_follow_link(struct dentry *direntry, void **cookie)
{
struct inode *inode = d_inode(direntry);
int rc = -ENOMEM;
@@ -643,16 +643,18 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
- rc = PTR_ERR(tlink);
- tlink = NULL;
- goto out;
+ free_xid(xid);
+ return ERR_CAST(tlink);
}
tcon = tlink_tcon(tlink);
server = tcon->ses->server;
full_path = build_path_from_dentry(direntry);
- if (!full_path)
- goto out;
+ if (!full_path) {
+ free_xid(xid);
+ cifs_put_tlink(tlink);
+ return ERR_PTR(-ENOMEM);
+ }
cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode);
@@ -670,17 +672,13 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
&target_path, cifs_sb);
kfree(full_path);
-out:
+ free_xid(xid);
+ cifs_put_tlink(tlink);
if (rc != 0) {
kfree(target_path);
- target_path = ERR_PTR(rc);
+ return ERR_PTR(rc);
}
-
- free_xid(xid);
- if (tlink)
- cifs_put_tlink(tlink);
- nd_set_link(nd, target_path);
- return NULL;
+ return *cookie = target_path;
}
int
@@ -717,7 +715,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname);
else if (pTcon->unix_ext)
rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname,
- cifs_sb->local_nls);
+ cifs_sb->local_nls,
+ cifs_remap(cifs_sb));
/* else
rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName,
cifs_sb_target->local_nls); */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b4a47237486b..b1eede3678a9 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
if (dentry) {
inode = d_inode(dentry);
if (inode) {
+ if (d_mountpoint(dentry))
+ goto out;
/*
* If we're generating inode numbers, then we don't
* want to clobber the existing one with the one that
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 7bfdd6066276..fc537c29044e 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
/* Check for unix extensions */
if (cap_unix(tcon->ses)) {
rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
- cifs_sb->local_nls);
+ cifs_sb->local_nls,
+ cifs_remap(cifs_sb));
if (rc == -EREMOTE)
rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
target_path,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 65cd7a84c8bc..54cbe19d9c08 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
/* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
/* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
- if ((tcon->ses) &&
+ if ((tcon->ses) && (tcon->ses->server) &&
(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
hdr->CreditCharge = cpu_to_le16(1);
/* else CreditCharge MBZ */
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index cc9f2546ea4a..ec5c8325b503 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -279,36 +279,27 @@ static int configfs_getlink(struct dentry *dentry, char * path)
}
-static void *configfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *configfs_follow_link(struct dentry *dentry, void **cookie)
{
- int error = -ENOMEM;
unsigned long page = get_zeroed_page(GFP_KERNEL);
+ int error;
- if (page) {
- error = configfs_getlink(dentry, (char *)page);
- if (!error) {
- nd_set_link(nd, (char *)page);
- return (void *)page;
- }
- }
-
- nd_set_link(nd, ERR_PTR(error));
- return NULL;
-}
+ if (!page)
+ return ERR_PTR(-ENOMEM);
-static void configfs_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- if (cookie) {
- unsigned long page = (unsigned long)cookie;
- free_page(page);
+ error = configfs_getlink(dentry, (char *)page);
+ if (!error) {
+ return *cookie = (void *)page;
}
+
+ free_page(page);
+ return ERR_PTR(error);
}
const struct inode_operations configfs_symlink_inode_operations = {
.follow_link = configfs_follow_link,
.readlink = generic_readlink,
- .put_link = configfs_put_link,
+ .put_link = free_page_put_link,
.setattr = configfs_setattr,
};
diff --git a/fs/dcache.c b/fs/dcache.c
index 656ce522a218..592c4b582495 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -322,17 +322,17 @@ static void dentry_free(struct dentry *dentry)
}
/**
- * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups
* @dentry: the target dentry
* After this call, in-progress rcu-walk path lookup will fail. This
* should be called after unhashing, and after changing d_inode (if
* the dentry has not already been unhashed).
*/
-static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+static inline void dentry_rcuwalk_invalidate(struct dentry *dentry)
{
- assert_spin_locked(&dentry->d_lock);
- /* Go through a barrier */
- write_seqcount_barrier(&dentry->d_seq);
+ lockdep_assert_held(&dentry->d_lock);
+ /* Go through am invalidation barrier */
+ write_seqcount_invalidate(&dentry->d_seq);
}
/*
@@ -372,7 +372,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
__d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
- dentry_rcuwalk_barrier(dentry);
+ dentry_rcuwalk_invalidate(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -494,7 +494,7 @@ void __d_drop(struct dentry *dentry)
__hlist_bl_del(&dentry->d_hash);
dentry->d_hash.pprev = NULL;
hlist_bl_unlock(b);
- dentry_rcuwalk_barrier(dentry);
+ dentry_rcuwalk_invalidate(dentry);
}
}
EXPORT_SYMBOL(__d_drop);
@@ -1239,13 +1239,13 @@ ascend:
/* might go back up the wrong parent if we have had a rename. */
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
- next = child->d_child.next;
- while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ /* go into the first sibling still alive */
+ do {
+ next = child->d_child.next;
if (next == &this_parent->d_subdirs)
goto ascend;
child = list_entry(next, struct dentry, d_child);
- next = next->next;
- }
+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
rcu_read_unlock();
goto resume;
}
@@ -1752,7 +1752,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
if (inode)
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
__d_set_inode_and_type(dentry, inode, add_flags);
- dentry_rcuwalk_barrier(dentry);
+ dentry_rcuwalk_invalidate(dentry);
spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 830a7e76f5c6..284f9aa0028b 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -17,7 +17,6 @@
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/pagemap.h>
-#include <linux/namei.h>
#include <linux/debugfs.h>
#include <linux/io.h>
#include <linux/slab.h>
@@ -43,17 +42,6 @@ const struct file_operations debugfs_file_operations = {
.llseek = noop_llseek,
};
-static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- nd_set_link(nd, d_inode(dentry)->i_private);
- return NULL;
-}
-
-const struct inode_operations debugfs_link_operations = {
- .readlink = generic_readlink,
- .follow_link = debugfs_follow_link,
-};
-
static int debugfs_u8_set(void *data, u64 val)
{
*(u8 *)data = val;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c1e7ffb0dab6..7eaec88ea970 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -174,7 +174,7 @@ static void debugfs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (S_ISLNK(inode->i_mode))
- kfree(inode->i_private);
+ kfree(inode->i_link);
}
static const struct super_operations debugfs_super_operations = {
@@ -511,8 +511,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
return failed_creating(dentry);
}
inode->i_mode = S_IFLNK | S_IRWXUGO;
- inode->i_op = &debugfs_link_operations;
- inode->i_private = link;
+ inode->i_op = &simple_symlink_inode_operations;
+ inode->i_link = link;
d_instantiate(dentry, inode);
return end_creating(dentry);
}
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index d08e079ea5d3..754fd6c0b747 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -921,8 +921,8 @@ static int tcp_accept_from_sock(struct connection *con)
mutex_unlock(&connections_lock);
memset(&peeraddr, 0, sizeof(peeraddr));
- result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
- IPPROTO_TCP, &newsock);
+ result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+ SOCK_STREAM, IPPROTO_TCP, &newsock);
if (result < 0)
return -ENOMEM;
@@ -1173,8 +1173,8 @@ static void tcp_connect_to_sock(struct connection *con)
goto out;
/* Create a socket to communicate with */
- result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
- IPPROTO_TCP, &sock);
+ result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
if (result < 0)
goto out_err;
@@ -1258,8 +1258,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
addr_len = sizeof(struct sockaddr_in6);
/* Create a socket to communicate with */
- result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
- IPPROTO_TCP, &sock);
+ result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
if (result < 0) {
log_print("Can't create listening comms socket");
goto create_out;
@@ -1365,8 +1365,8 @@ static int sctp_listen_for_all(void)
log_print("Using SCTP for communications");
- result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
- IPPROTO_SCTP, &sock);
+ result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+ SOCK_SEQPACKET, IPPROTO_SCTP, &sock);
if (result < 0) {
log_print("Can't create comms socket, check SCTP is loaded");
goto out;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index fc850b55db67..3c4db1172d22 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -170,7 +170,6 @@ out_unlock:
* @directory_inode: inode of the new file's dentry's parent in ecryptfs
* @ecryptfs_dentry: New file's dentry in ecryptfs
* @mode: The mode of the new file
- * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
*
* Creates the underlying file and the eCryptfs inode which will link to
* it. It will also update the eCryptfs directory inode to mimic the
@@ -384,7 +383,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
* ecryptfs_lookup
* @ecryptfs_dir_inode: The eCryptfs directory inode
* @ecryptfs_dentry: The eCryptfs dentry that we are looking up
- * @ecryptfs_nd: nameidata; may be NULL
+ * @flags: lookup flags
*
* Find a file on disk. If the file does not exist, then we'll add it to the
* dentry cache and continue on to read it from the disk.
@@ -675,18 +674,16 @@ out:
return rc ? ERR_PTR(rc) : buf;
}
-static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie)
{
size_t len;
char *buf = ecryptfs_readlink_lower(dentry, &len);
if (IS_ERR(buf))
- goto out;
+ return buf;
fsstack_copy_attr_atime(d_inode(dentry),
d_inode(ecryptfs_dentry_to_lower(dentry)));
buf[len] = '\0';
-out:
- nd_set_link(nd, buf);
- return NULL;
+ return *cookie = buf;
}
/**
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index b47c7b8dc275..a364fd0965ec 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -16,5 +16,5 @@
libore-y := ore.o ore_raid.o
obj-$(CONFIG_ORE) += libore.o
-exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o
+exofs-y := inode.o file.o namei.o dir.o super.o sys.o
obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index ad9cac670a47..2e86086bc940 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -207,10 +207,6 @@ extern const struct address_space_operations exofs_aops;
extern const struct inode_operations exofs_dir_inode_operations;
extern const struct inode_operations exofs_special_inode_operations;
-/* symlink.c */
-extern const struct inode_operations exofs_symlink_inode_operations;
-extern const struct inode_operations exofs_fast_symlink_inode_operations;
-
/* exofs_init_comps will initialize an ore_components device array
* pointing to a single ore_comp struct, and a round-robin view
* of the device table.
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 786e4cc8c889..73c64daa0f55 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1222,10 +1222,11 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &exofs_dir_operations;
inode->i_mapping->a_ops = &exofs_aops;
} else if (S_ISLNK(inode->i_mode)) {
- if (exofs_inode_is_fast_symlink(inode))
- inode->i_op = &exofs_fast_symlink_inode_operations;
- else {
- inode->i_op = &exofs_symlink_inode_operations;
+ if (exofs_inode_is_fast_symlink(inode)) {
+ inode->i_op = &simple_symlink_inode_operations;
+ inode->i_link = (char *)oi->i_data;
+ } else {
+ inode->i_op = &page_symlink_inode_operations;
inode->i_mapping->a_ops = &exofs_aops;
}
} else {
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 5ae25e431191..09a6bb1ad63c 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -113,7 +113,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
oi = exofs_i(inode);
if (l > sizeof(oi->i_data)) {
/* slow symlink */
- inode->i_op = &exofs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
inode->i_mapping->a_ops = &exofs_aops;
memset(oi->i_data, 0, sizeof(oi->i_data));
@@ -122,7 +122,8 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
goto out_fail;
} else {
/* fast symlink */
- inode->i_op = &exofs_fast_symlink_inode_operations;
+ inode->i_op = &simple_symlink_inode_operations;
+ inode->i_link = (char *)oi->i_data;
memcpy(oi->i_data, symname, l);
inode->i_size = l-1;
}
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
deleted file mode 100644
index 6f6f3a4c1365..000000000000
--- a/fs/exofs/symlink.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com)
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * Copyrights for code taken from ext2:
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- * from
- * linux/fs/minix/inode.c
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation. Since it is based on ext2, and the only
- * valid version of GPL for the Linux kernel is version 2, the only valid
- * version of GPL for exofs is version 2.
- *
- * exofs is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with exofs; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/namei.h>
-
-#include "exofs.h"
-
-static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct exofs_i_info *oi = exofs_i(d_inode(dentry));
-
- nd_set_link(nd, (char *)oi->i_data);
- return NULL;
-}
-
-const struct inode_operations exofs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
-
-const struct inode_operations exofs_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = exofs_follow_link,
-};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index f460ae36d5b7..5c09776d347f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1403,6 +1403,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (ext2_inode_is_fast_symlink(inode)) {
+ inode->i_link = (char *)ei->i_data;
inode->i_op = &ext2_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3e074a9ccbe6..13ec54a99c96 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -189,7 +189,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
} else {
/* fast symlink */
inode->i_op = &ext2_fast_symlink_inode_operations;
- memcpy((char*)(EXT2_I(inode)->i_data),symname,l);
+ inode->i_link = (char*)EXT2_I(inode)->i_data;
+ memcpy(inode->i_link, symname, l);
inode->i_size = l-1;
}
mark_inode_dirty(inode);
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 20608f17c2e5..ae17179f3810 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -19,14 +19,6 @@
#include "ext2.h"
#include "xattr.h"
-#include <linux/namei.h>
-
-static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ext2_inode_info *ei = EXT2_I(d_inode(dentry));
- nd_set_link(nd, (char *)ei->i_data);
- return NULL;
-}
const struct inode_operations ext2_symlink_inode_operations = {
.readlink = generic_readlink,
@@ -43,7 +35,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
const struct inode_operations ext2_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext2_follow_link,
+ .follow_link = simple_follow_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2ee2dc4351d1..6c7e5468a2f8 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2999,6 +2999,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &ext3_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
+ inode->i_link = (char *)ei->i_data;
} else {
inode->i_op = &ext3_symlink_inode_operations;
ext3_set_aops(inode);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 4264b9bd0002..c9e767cd4b67 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2308,7 +2308,8 @@ retry:
}
} else {
inode->i_op = &ext3_fast_symlink_inode_operations;
- memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+ inode->i_link = (char*)&EXT3_I(inode)->i_data;
+ memcpy(inode->i_link, symname, l);
inode->i_size = l-1;
}
EXT3_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a9312f0a54e5..5ed0044fbb37 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1908,7 +1908,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
- for (i=0; i < 4; i++)
+ for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
i = le32_to_cpu(es->s_flags);
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index ea96df3c58db..c08c59094ae6 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -17,17 +17,9 @@
* ext3 symlink handling code
*/
-#include <linux/namei.h>
#include "ext3.h"
#include "xattr.h"
-static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ext3_inode_info *ei = EXT3_I(d_inode(dentry));
- nd_set_link(nd, (char*)ei->i_data);
- return NULL;
-}
-
const struct inode_operations ext3_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
@@ -43,7 +35,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
const struct inode_operations ext3_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext3_follow_link,
+ .follow_link = simple_follow_link,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9a83f149ac85..0a3b72d1d458 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2847,6 +2847,7 @@ extern int ext4_mpage_readpages(struct address_space *mapping,
unsigned nr_pages);
/* symlink.c */
+extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
extern const struct inode_operations ext4_symlink_inode_operations;
extern const struct inode_operations ext4_fast_symlink_inode_operations;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0554b0b5957b..5168c9b56880 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4213,8 +4213,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
- if (ext4_inode_is_fast_symlink(inode) &&
- !ext4_encrypted_inode(inode)) {
+ if (ext4_encrypted_inode(inode)) {
+ inode->i_op = &ext4_encrypted_symlink_inode_operations;
+ ext4_set_aops(inode);
+ } else if (ext4_inode_is_fast_symlink(inode)) {
+ inode->i_link = (char *)ei->i_data;
inode->i_op = &ext4_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 814f3beb4369..5fdb9f6aa869 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3206,10 +3206,12 @@ static int ext4_symlink(struct inode *dir,
goto err_drop_inode;
sd->len = cpu_to_le16(ostr.len);
disk_link.name = (char *) sd;
+ inode->i_op = &ext4_encrypted_symlink_inode_operations;
}
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
- inode->i_op = &ext4_symlink_inode_operations;
+ if (!encryption_required)
+ inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
/*
* We cannot call page_symlink() with transaction started
@@ -3249,9 +3251,10 @@ static int ext4_symlink(struct inode *dir,
} else {
/* clear the extent format for fast symlink */
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
- inode->i_op = encryption_required ?
- &ext4_symlink_inode_operations :
- &ext4_fast_symlink_inode_operations;
+ if (!encryption_required) {
+ inode->i_op = &ext4_fast_symlink_inode_operations;
+ inode->i_link = (char *)&EXT4_I(inode)->i_data;
+ }
memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
disk_link.len);
inode->i_size = disk_link.len - 1;
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 187b78920314..ba5bd18a9825 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,7 +23,7 @@
#include "xattr.h"
#ifdef CONFIG_EXT4_FS_ENCRYPTION
-static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
@@ -35,12 +35,9 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
int res;
u32 plen, max_size = inode->i_sb->s_blocksize;
- if (!ext4_encrypted_inode(inode))
- return page_follow_link_light(dentry, nd);
-
ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
if (IS_ERR(ctx))
- return ctx;
+ return ERR_CAST(ctx);
if (ext4_inode_is_fast_symlink(inode)) {
caddr = (char *) EXT4_I(inode)->i_data;
@@ -49,7 +46,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage)) {
ext4_put_fname_crypto_ctx(&ctx);
- return cpage;
+ return ERR_CAST(cpage);
}
caddr = kmap(cpage);
caddr[size] = 0;
@@ -80,13 +77,12 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
/* Null-terminate the name */
if (res <= plen)
paddr[res] = '\0';
- nd_set_link(nd, paddr);
ext4_put_fname_crypto_ctx(&ctx);
if (cpage) {
kunmap(cpage);
page_cache_release(cpage);
}
- return NULL;
+ return *cookie = paddr;
errout:
ext4_put_fname_crypto_ctx(&ctx);
if (cpage) {
@@ -97,36 +93,22 @@ errout:
return ERR_PTR(res);
}
-static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- struct page *page = cookie;
-
- if (!page) {
- kfree(nd_get_link(nd));
- } else {
- kunmap(page);
- page_cache_release(page);
- }
-}
+const struct inode_operations ext4_encrypted_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = ext4_follow_link,
+ .put_link = kfree_put_link,
+ .setattr = ext4_setattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = ext4_listxattr,
+ .removexattr = generic_removexattr,
+};
#endif
-static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ext4_inode_info *ei = EXT4_I(d_inode(dentry));
- nd_set_link(nd, (char *) ei->i_data);
- return NULL;
-}
-
const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- .follow_link = ext4_follow_link,
- .put_link = ext4_put_link,
-#else
.follow_link = page_follow_link_light,
.put_link = page_put_link,
-#endif
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -136,7 +118,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext4_follow_fast_link,
+ .follow_link = simple_follow_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 05f0f663f14c..c629762005bc 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -72,6 +72,25 @@ config F2FS_CHECK_FS
If you want to improve the performance, say N.
+config F2FS_FS_ENCRYPTION
+ bool "F2FS Encryption"
+ depends on F2FS_FS
+ depends on F2FS_FS_XATTR
+ select CRYPTO_AES
+ select CRYPTO_CBC
+ select CRYPTO_ECB
+ select CRYPTO_XTS
+ select CRYPTO_CTS
+ select CRYPTO_CTR
+ select CRYPTO_SHA256
+ select KEYS
+ select ENCRYPTED_KEYS
+ help
+ Enable encryption of f2fs files and directories. This
+ feature is similar to ecryptfs, but it is more memory
+ efficient since it avoids caching the encrypted and
+ decrypted pages in the page cache.
+
config F2FS_IO_TRACE
bool "F2FS IO tracer"
depends on F2FS_FS
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index d92397731db8..396be1a39e55 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -6,3 +6,5 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
+f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \
+ crypto_key.o crypto_fname.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 4320ffab3495..c8f25f7241f0 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -334,51 +334,45 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
struct page *dpage)
{
struct posix_acl *p;
+ struct posix_acl *clone;
int ret;
+ *acl = NULL;
+ *default_acl = NULL;
+
if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
- goto no_acl;
+ return 0;
p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage);
- if (IS_ERR(p)) {
- if (p == ERR_PTR(-EOPNOTSUPP))
- goto apply_umask;
- return PTR_ERR(p);
+ if (!p || p == ERR_PTR(-EOPNOTSUPP)) {
+ *mode &= ~current_umask();
+ return 0;
}
+ if (IS_ERR(p))
+ return PTR_ERR(p);
- if (!p)
- goto apply_umask;
-
- *acl = f2fs_acl_clone(p, GFP_NOFS);
- if (!*acl)
+ clone = f2fs_acl_clone(p, GFP_NOFS);
+ if (!clone)
goto no_mem;
- ret = f2fs_acl_create_masq(*acl, mode);
+ ret = f2fs_acl_create_masq(clone, mode);
if (ret < 0)
goto no_mem_clone;
- if (ret == 0) {
- posix_acl_release(*acl);
- *acl = NULL;
- }
+ if (ret == 0)
+ posix_acl_release(clone);
+ else
+ *acl = clone;
- if (!S_ISDIR(*mode)) {
+ if (!S_ISDIR(*mode))
posix_acl_release(p);
- *default_acl = NULL;
- } else {
+ else
*default_acl = p;
- }
- return 0;
-apply_umask:
- *mode &= ~current_umask();
-no_acl:
- *default_acl = NULL;
- *acl = NULL;
return 0;
no_mem_clone:
- posix_acl_release(*acl);
+ posix_acl_release(clone);
no_mem:
posix_acl_release(p);
return -ENOMEM;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a5e17a2a0781..b70bbe1a6a8c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -52,9 +52,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO,
.blk_addr = index,
+ .encrypted_page = NULL,
};
repeat:
page = grab_cache_page(mapping, index);
@@ -65,7 +67,9 @@ repeat:
if (PageUptodate(page))
goto out;
- if (f2fs_submit_page_bio(sbi, page, &fio))
+ fio.page = page;
+
+ if (f2fs_submit_page_bio(&fio))
goto repeat;
lock_page(page);
@@ -77,8 +81,7 @@ out:
return page;
}
-static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr, int type)
+bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
@@ -118,8 +121,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
struct page *page;
block_t blkno = start;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = META,
- .rw = READ_SYNC | REQ_META | REQ_PRIO
+ .rw = READ_SYNC | REQ_META | REQ_PRIO,
+ .encrypted_page = NULL,
};
for (; nrpages-- > 0; blkno++) {
@@ -161,7 +166,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
continue;
}
- f2fs_submit_page_mbio(sbi, page, &fio);
+ fio.page = page;
+ f2fs_submit_page_mbio(&fio);
f2fs_put_page(page, 0);
}
out:
@@ -510,7 +516,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
grab_meta_page(sbi, start_blk + index);
index = 1;
- spin_lock(&im->ino_lock);
+
+ /*
+ * we don't need to do spin_lock(&im->ino_lock) here, since all the
+ * orphan inode operations are covered under f2fs_lock_op().
+ * And, spin_lock should be avoided due to page operations below.
+ */
head = &im->ino_list;
/* loop for each orphan inode entry and write them in Jornal block */
@@ -550,8 +561,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
set_page_dirty(page);
f2fs_put_page(page, 1);
}
-
- spin_unlock(&im->ino_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -879,10 +888,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
block_t start_blk;
- struct page *cp_page;
unsigned int data_sum_blocks, orphan_blocks;
__u32 crc32 = 0;
- void *kaddr;
int i;
int cp_payload_blks = __cp_payload(sbi);
@@ -979,19 +986,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_addr(sbi);
/* write out checkpoint buffer at block 0 */
- cp_page = grab_meta_page(sbi, start_blk++);
- kaddr = page_address(cp_page);
- memcpy(kaddr, ckpt, F2FS_BLKSIZE);
- set_page_dirty(cp_page);
- f2fs_put_page(cp_page, 1);
-
- for (i = 1; i < 1 + cp_payload_blks; i++) {
- cp_page = grab_meta_page(sbi, start_blk++);
- kaddr = page_address(cp_page);
- memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE);
- set_page_dirty(cp_page);
- f2fs_put_page(cp_page, 1);
- }
+ update_meta_page(sbi, ckpt, start_blk++);
+
+ for (i = 1; i < 1 + cp_payload_blks; i++)
+ update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
+ start_blk++);
if (orphan_num) {
write_orphan_inodes(sbi, start_blk);
@@ -1006,11 +1005,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
/* writeout checkpoint block */
- cp_page = grab_meta_page(sbi, start_blk);
- kaddr = page_address(cp_page);
- memcpy(kaddr, ckpt, F2FS_BLKSIZE);
- set_page_dirty(cp_page);
- f2fs_put_page(cp_page, 1);
+ update_meta_page(sbi, ckpt, start_blk);
/* wait for previous submitted node/meta pages writeback */
wait_on_all_pages_writeback(sbi);
@@ -1036,7 +1031,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return;
- clear_prefree_segments(sbi);
+ clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
}
@@ -1051,7 +1046,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
- (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
+ (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
+ (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi)))
goto out;
diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
new file mode 100644
index 000000000000..4a62ef14e932
--- /dev/null
+++ b/fs/f2fs/crypto.c
@@ -0,0 +1,491 @@
+/*
+ * linux/fs/f2fs/crypto.c
+ *
+ * Copied from linux/fs/ext4/crypto.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ *
+ * This contains encryption functions for f2fs
+ *
+ * Written by Michael Halcrow, 2014.
+ *
+ * Filename encryption additions
+ * Uday Savagaonkar, 2014
+ * Encryption policy handling additions
+ * Ildar Muslukhov, 2014
+ * Remove ext4_encrypted_zeroout(),
+ * add f2fs_restore_and_release_control_page()
+ * Jaegeuk Kim, 2015.
+ *
+ * This has not yet undergone a rigorous security audit.
+ *
+ * The usage of AES-XTS should conform to recommendations in NIST
+ * Special Publication 800-38E and IEEE P1619/D16.
+ */
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/user-type.h>
+#include <keys/encrypted-type.h>
+#include <linux/crypto.h>
+#include <linux/ecryptfs.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock_types.h>
+#include <linux/f2fs_fs.h>
+#include <linux/ratelimit.h>
+#include <linux/bio.h>
+
+#include "f2fs.h"
+#include "xattr.h"
+
+/* Encryption added and removed here! (L: */
+
+static unsigned int num_prealloc_crypto_pages = 32;
+static unsigned int num_prealloc_crypto_ctxs = 128;
+
+module_param(num_prealloc_crypto_pages, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_pages,
+ "Number of crypto pages to preallocate");
+module_param(num_prealloc_crypto_ctxs, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
+ "Number of crypto contexts to preallocate");
+
+static mempool_t *f2fs_bounce_page_pool;
+
+static LIST_HEAD(f2fs_free_crypto_ctxs);
+static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock);
+
+static struct workqueue_struct *f2fs_read_workqueue;
+static DEFINE_MUTEX(crypto_init);
+
+static struct kmem_cache *f2fs_crypto_ctx_cachep;
+struct kmem_cache *f2fs_crypt_info_cachep;
+
+/**
+ * f2fs_release_crypto_ctx() - Releases an encryption context
+ * @ctx: The encryption context to release.
+ *
+ * If the encryption context was allocated from the pre-allocated pool, returns
+ * it to that pool. Else, frees it.
+ *
+ * If there's a bounce page in the context, this frees that.
+ */
+void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
+{
+ unsigned long flags;
+
+ if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) {
+ mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool);
+ ctx->w.bounce_page = NULL;
+ }
+ ctx->w.control_page = NULL;
+ if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
+ kmem_cache_free(f2fs_crypto_ctx_cachep, ctx);
+ } else {
+ spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
+ list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
+ spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
+ }
+}
+
+/**
+ * f2fs_get_crypto_ctx() - Gets an encryption context
+ * @inode: The inode for which we are doing the crypto
+ *
+ * Allocates and initializes an encryption context.
+ *
+ * Return: An allocated and initialized encryption context on success; error
+ * value or NULL otherwise.
+ */
+struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode)
+{
+ struct f2fs_crypto_ctx *ctx = NULL;
+ unsigned long flags;
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+
+ if (ci == NULL)
+ return ERR_PTR(-ENOKEY);
+
+ /*
+ * We first try getting the ctx from a free list because in
+ * the common case the ctx will have an allocated and
+ * initialized crypto tfm, so it's probably a worthwhile
+ * optimization. For the bounce page, we first try getting it
+ * from the kernel allocator because that's just about as fast
+ * as getting it from a list and because a cache of free pages
+ * should generally be a "last resort" option for a filesystem
+ * to be able to do its job.
+ */
+ spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
+ ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs,
+ struct f2fs_crypto_ctx, free_list);
+ if (ctx)
+ list_del(&ctx->free_list);
+ spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
+ if (!ctx) {
+ ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+ ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
+ } else {
+ ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
+ }
+ ctx->flags &= ~F2FS_WRITE_PATH_FL;
+ return ctx;
+}
+
+/*
+ * Call f2fs_decrypt on every single page, reusing the encryption
+ * context.
+ */
+static void completion_pages(struct work_struct *work)
+{
+ struct f2fs_crypto_ctx *ctx =
+ container_of(work, struct f2fs_crypto_ctx, r.work);
+ struct bio *bio = ctx->r.bio;
+ struct bio_vec *bv;
+ int i;
+
+ bio_for_each_segment_all(bv, bio, i) {
+ struct page *page = bv->bv_page;
+ int ret = f2fs_decrypt(ctx, page);
+
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ f2fs_release_crypto_ctx(ctx);
+ bio_put(bio);
+}
+
+void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio)
+{
+ INIT_WORK(&ctx->r.work, completion_pages);
+ ctx->r.bio = bio;
+ queue_work(f2fs_read_workqueue, &ctx->r.work);
+}
+
+static void f2fs_crypto_destroy(void)
+{
+ struct f2fs_crypto_ctx *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list)
+ kmem_cache_free(f2fs_crypto_ctx_cachep, pos);
+ INIT_LIST_HEAD(&f2fs_free_crypto_ctxs);
+ if (f2fs_bounce_page_pool)
+ mempool_destroy(f2fs_bounce_page_pool);
+ f2fs_bounce_page_pool = NULL;
+}
+
+/**
+ * f2fs_crypto_initialize() - Set up for f2fs encryption.
+ *
+ * We only call this when we start accessing encrypted files, since it
+ * results in memory getting allocated that wouldn't otherwise be used.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int f2fs_crypto_initialize(void)
+{
+ int i, res = -ENOMEM;
+
+ if (f2fs_bounce_page_pool)
+ return 0;
+
+ mutex_lock(&crypto_init);
+ if (f2fs_bounce_page_pool)
+ goto already_initialized;
+
+ for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
+ struct f2fs_crypto_ctx *ctx;
+
+ ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL);
+ if (!ctx)
+ goto fail;
+ list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
+ }
+
+ /* must be allocated at the last step to avoid race condition above */
+ f2fs_bounce_page_pool =
+ mempool_create_page_pool(num_prealloc_crypto_pages, 0);
+ if (!f2fs_bounce_page_pool)
+ goto fail;
+
+already_initialized:
+ mutex_unlock(&crypto_init);
+ return 0;
+fail:
+ f2fs_crypto_destroy();
+ mutex_unlock(&crypto_init);
+ return res;
+}
+
+/**
+ * f2fs_exit_crypto() - Shutdown the f2fs encryption system
+ */
+void f2fs_exit_crypto(void)
+{
+ f2fs_crypto_destroy();
+
+ if (f2fs_read_workqueue)
+ destroy_workqueue(f2fs_read_workqueue);
+ if (f2fs_crypto_ctx_cachep)
+ kmem_cache_destroy(f2fs_crypto_ctx_cachep);
+ if (f2fs_crypt_info_cachep)
+ kmem_cache_destroy(f2fs_crypt_info_cachep);
+}
+
+int __init f2fs_init_crypto(void)
+{
+ int res = -ENOMEM;
+
+ f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0);
+ if (!f2fs_read_workqueue)
+ goto fail;
+
+ f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx,
+ SLAB_RECLAIM_ACCOUNT);
+ if (!f2fs_crypto_ctx_cachep)
+ goto fail;
+
+ f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info,
+ SLAB_RECLAIM_ACCOUNT);
+ if (!f2fs_crypt_info_cachep)
+ goto fail;
+
+ return 0;
+fail:
+ f2fs_exit_crypto();
+ return res;
+}
+
+void f2fs_restore_and_release_control_page(struct page **page)
+{
+ struct f2fs_crypto_ctx *ctx;
+ struct page *bounce_page;
+
+ /* The bounce data pages are unmapped. */
+ if ((*page)->mapping)
+ return;
+
+ /* The bounce data page is unmapped. */
+ bounce_page = *page;
+ ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page);
+
+ /* restore control page */
+ *page = ctx->w.control_page;
+
+ f2fs_restore_control_page(bounce_page);
+}
+
+void f2fs_restore_control_page(struct page *data_page)
+{
+ struct f2fs_crypto_ctx *ctx =
+ (struct f2fs_crypto_ctx *)page_private(data_page);
+
+ set_page_private(data_page, (unsigned long)NULL);
+ ClearPagePrivate(data_page);
+ unlock_page(data_page);
+ f2fs_release_crypto_ctx(ctx);
+}
+
+/**
+ * f2fs_crypt_complete() - The completion callback for page encryption
+ * @req: The asynchronous encryption request context
+ * @res: The result of the encryption operation
+ */
+static void f2fs_crypt_complete(struct crypto_async_request *req, int res)
+{
+ struct f2fs_completion_result *ecr = req->data;
+
+ if (res == -EINPROGRESS)
+ return;
+ ecr->res = res;
+ complete(&ecr->completion);
+}
+
+typedef enum {
+ F2FS_DECRYPT = 0,
+ F2FS_ENCRYPT,
+} f2fs_direction_t;
+
+static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
+ struct inode *inode,
+ f2fs_direction_t rw,
+ pgoff_t index,
+ struct page *src_page,
+ struct page *dest_page)
+{
+ u8 xts_tweak[F2FS_XTS_TWEAK_SIZE];
+ struct ablkcipher_request *req = NULL;
+ DECLARE_F2FS_COMPLETION_RESULT(ecr);
+ struct scatterlist dst, src;
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+ struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ int res = 0;
+
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_request_alloc() failed\n",
+ __func__);
+ return -ENOMEM;
+ }
+ ablkcipher_request_set_callback(
+ req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ f2fs_crypt_complete, &ecr);
+
+ BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index));
+ memcpy(xts_tweak, &index, sizeof(index));
+ memset(&xts_tweak[sizeof(index)], 0,
+ F2FS_XTS_TWEAK_SIZE - sizeof(index));
+
+ sg_init_table(&dst, 1);
+ sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+ sg_init_table(&src, 1);
+ sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
+ ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+ xts_tweak);
+ if (rw == F2FS_DECRYPT)
+ res = crypto_ablkcipher_decrypt(req);
+ else
+ res = crypto_ablkcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ ablkcipher_request_free(req);
+ if (res) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_ablkcipher_encrypt() returned %d\n",
+ __func__, res);
+ return res;
+ }
+ return 0;
+}
+
+static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx)
+{
+ ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT);
+ if (ctx->w.bounce_page == NULL)
+ return ERR_PTR(-ENOMEM);
+ ctx->flags |= F2FS_WRITE_PATH_FL;
+ return ctx->w.bounce_page;
+}
+
+/**
+ * f2fs_encrypt() - Encrypts a page
+ * @inode: The inode for which the encryption should take place
+ * @plaintext_page: The page to encrypt. Must be locked.
+ *
+ * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
+ * encryption context.
+ *
+ * Called on the page write path. The caller must call
+ * f2fs_restore_control_page() on the returned ciphertext page to
+ * release the bounce buffer and the encryption context.
+ *
+ * Return: An allocated page with the encrypted content on success. Else, an
+ * error value or NULL.
+ */
+struct page *f2fs_encrypt(struct inode *inode,
+ struct page *plaintext_page)
+{
+ struct f2fs_crypto_ctx *ctx;
+ struct page *ciphertext_page = NULL;
+ int err;
+
+ BUG_ON(!PageLocked(plaintext_page));
+
+ ctx = f2fs_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ return (struct page *)ctx;
+
+ /* The encryption operation will require a bounce page. */
+ ciphertext_page = alloc_bounce_page(ctx);
+ if (IS_ERR(ciphertext_page))
+ goto err_out;
+
+ ctx->w.control_page = plaintext_page;
+ err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index,
+ plaintext_page, ciphertext_page);
+ if (err) {
+ ciphertext_page = ERR_PTR(err);
+ goto err_out;
+ }
+
+ SetPagePrivate(ciphertext_page);
+ set_page_private(ciphertext_page, (unsigned long)ctx);
+ lock_page(ciphertext_page);
+ return ciphertext_page;
+
+err_out:
+ f2fs_release_crypto_ctx(ctx);
+ return ciphertext_page;
+}
+
+/**
+ * f2fs_decrypt() - Decrypts a page in-place
+ * @ctx: The encryption context.
+ * @page: The page to decrypt. Must be locked.
+ *
+ * Decrypts page in-place using the ctx encryption context.
+ *
+ * Called from the read completion callback.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int f2fs_decrypt(struct f2fs_crypto_ctx *ctx, struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+
+ return f2fs_page_crypto(ctx, page->mapping->host,
+ F2FS_DECRYPT, page->index, page, page);
+}
+
+/*
+ * Convenience function which takes care of allocating and
+ * deallocating the encryption context
+ */
+int f2fs_decrypt_one(struct inode *inode, struct page *page)
+{
+ struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode);
+ int ret;
+
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+ ret = f2fs_decrypt(ctx, page);
+ f2fs_release_crypto_ctx(ctx);
+ return ret;
+}
+
+bool f2fs_valid_contents_enc_mode(uint32_t mode)
+{
+ return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS);
+}
+
+/**
+ * f2fs_validate_encryption_key_size() - Validate the encryption key size
+ * @mode: The key mode.
+ * @size: The key size to validate.
+ *
+ * Return: The validated key size for @mode. Zero if invalid.
+ */
+uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size)
+{
+ if (size == f2fs_encryption_key_size(mode))
+ return size;
+ return 0;
+}
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
new file mode 100644
index 000000000000..ab377d496a39
--- /dev/null
+++ b/fs/f2fs/crypto_fname.c
@@ -0,0 +1,440 @@
+/*
+ * linux/fs/f2fs/crypto_fname.c
+ *
+ * Copied from linux/fs/ext4/crypto.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ *
+ * This contains functions for filename crypto management in f2fs
+ *
+ * Written by Uday Savagaonkar, 2014.
+ *
+ * Adjust f2fs dentry structure
+ * Jaegeuk Kim, 2015.
+ *
+ * This has not yet undergone a rigorous security audit.
+ */
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/crypto.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock_types.h>
+#include <linux/f2fs_fs.h>
+#include <linux/ratelimit.h>
+
+#include "f2fs.h"
+#include "f2fs_crypto.h"
+#include "xattr.h"
+
+/**
+ * f2fs_dir_crypt_complete() -
+ */
+static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res)
+{
+ struct f2fs_completion_result *ecr = req->data;
+
+ if (res == -EINPROGRESS)
+ return;
+ ecr->res = res;
+ complete(&ecr->completion);
+}
+
+bool f2fs_valid_filenames_enc_mode(uint32_t mode)
+{
+ return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS);
+}
+
+static unsigned max_name_len(struct inode *inode)
+{
+ return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
+ F2FS_NAME_LEN;
+}
+
+/**
+ * f2fs_fname_encrypt() -
+ *
+ * This function encrypts the input filename, and returns the length of the
+ * ciphertext. Errors are returned as negative numbers. We trust the caller to
+ * allocate sufficient memory to oname string.
+ */
+static int f2fs_fname_encrypt(struct inode *inode,
+ const struct qstr *iname, struct f2fs_str *oname)
+{
+ u32 ciphertext_len;
+ struct ablkcipher_request *req = NULL;
+ DECLARE_F2FS_COMPLETION_RESULT(ecr);
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+ struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ int res = 0;
+ char iv[F2FS_CRYPTO_BLOCK_SIZE];
+ struct scatterlist src_sg, dst_sg;
+ int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
+ char *workbuf, buf[32], *alloc_buf = NULL;
+ unsigned lim = max_name_len(inode);
+
+ if (iname->len <= 0 || iname->len > lim)
+ return -EIO;
+
+ ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ?
+ F2FS_CRYPTO_BLOCK_SIZE : iname->len;
+ ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding);
+ ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
+
+ if (ciphertext_len <= sizeof(buf)) {
+ workbuf = buf;
+ } else {
+ alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
+ if (!alloc_buf)
+ return -ENOMEM;
+ workbuf = alloc_buf;
+ }
+
+ /* Allocate request */
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_request_alloc() failed\n", __func__);
+ kfree(alloc_buf);
+ return -ENOMEM;
+ }
+ ablkcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ f2fs_dir_crypt_complete, &ecr);
+
+ /* Copy the input */
+ memcpy(workbuf, iname->name, iname->len);
+ if (iname->len < ciphertext_len)
+ memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
+
+ /* Initialize IV */
+ memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
+
+ /* Create encryption request */
+ sg_init_one(&src_sg, workbuf, ciphertext_len);
+ sg_init_one(&dst_sg, oname->name, ciphertext_len);
+ ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+ res = crypto_ablkcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ kfree(alloc_buf);
+ ablkcipher_request_free(req);
+ if (res < 0) {
+ printk_ratelimited(KERN_ERR
+ "%s: Error (error code %d)\n", __func__, res);
+ }
+ oname->len = ciphertext_len;
+ return res;
+}
+
+/*
+ * f2fs_fname_decrypt()
+ * This function decrypts the input filename, and returns
+ * the length of the plaintext.
+ * Errors are returned as negative numbers.
+ * We trust the caller to allocate sufficient memory to oname string.
+ */
+static int f2fs_fname_decrypt(struct inode *inode,
+ const struct f2fs_str *iname, struct f2fs_str *oname)
+{
+ struct ablkcipher_request *req = NULL;
+ DECLARE_F2FS_COMPLETION_RESULT(ecr);
+ struct scatterlist src_sg, dst_sg;
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+ struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ int res = 0;
+ char iv[F2FS_CRYPTO_BLOCK_SIZE];
+ unsigned lim = max_name_len(inode);
+
+ if (iname->len <= 0 || iname->len > lim)
+ return -EIO;
+
+ /* Allocate request */
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_request_alloc() failed\n", __func__);
+ return -ENOMEM;
+ }
+ ablkcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ f2fs_dir_crypt_complete, &ecr);
+
+ /* Initialize IV */
+ memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
+
+ /* Create decryption request */
+ sg_init_one(&src_sg, iname->name, iname->len);
+ sg_init_one(&dst_sg, oname->name, oname->len);
+ ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ res = crypto_ablkcipher_decrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ ablkcipher_request_free(req);
+ if (res < 0) {
+ printk_ratelimited(KERN_ERR
+ "%s: Error in f2fs_fname_decrypt (error code %d)\n",
+ __func__, res);
+ return res;
+ }
+
+ oname->len = strnlen(oname->name, iname->len);
+ return oname->len;
+}
+
+static const char *lookup_table =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+/**
+ * f2fs_fname_encode_digest() -
+ *
+ * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
+ * The encoded string is roughly 4/3 times the size of the input string.
+ */
+static int digest_encode(const char *src, int len, char *dst)
+{
+ int i = 0, bits = 0, ac = 0;
+ char *cp = dst;
+
+ while (i < len) {
+ ac += (((unsigned char) src[i]) << bits);
+ bits += 8;
+ do {
+ *cp++ = lookup_table[ac & 0x3f];
+ ac >>= 6;
+ bits -= 6;
+ } while (bits >= 6);
+ i++;
+ }
+ if (bits)
+ *cp++ = lookup_table[ac & 0x3f];
+ return cp - dst;
+}
+
+static int digest_decode(const char *src, int len, char *dst)
+{
+ int i = 0, bits = 0, ac = 0;
+ const char *p;
+ char *cp = dst;
+
+ while (i < len) {
+ p = strchr(lookup_table, src[i]);
+ if (p == NULL || src[i] == 0)
+ return -2;
+ ac += (p - lookup_table) << bits;
+ bits += 6;
+ if (bits >= 8) {
+ *cp++ = ac & 0xff;
+ ac >>= 8;
+ bits -= 8;
+ }
+ i++;
+ }
+ if (ac)
+ return -1;
+ return cp - dst;
+}
+
+/**
+ * f2fs_fname_crypto_round_up() -
+ *
+ * Return: The next multiple of block size
+ */
+u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize)
+{
+ return ((size + blksize - 1) / blksize) * blksize;
+}
+
+/**
+ * f2fs_fname_crypto_alloc_obuff() -
+ *
+ * Allocates an output buffer that is sufficient for the crypto operation
+ * specified by the context and the direction.
+ */
+int f2fs_fname_crypto_alloc_buffer(struct inode *inode,
+ u32 ilen, struct f2fs_str *crypto_str)
+{
+ unsigned int olen;
+ int padding = 16;
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+
+ if (ci)
+ padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
+ if (padding < F2FS_CRYPTO_BLOCK_SIZE)
+ padding = F2FS_CRYPTO_BLOCK_SIZE;
+ olen = f2fs_fname_crypto_round_up(ilen, padding);
+ crypto_str->len = olen;
+ if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
+ olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
+ /* Allocated buffer can hold one more character to null-terminate the
+ * string */
+ crypto_str->name = kmalloc(olen + 1, GFP_NOFS);
+ if (!(crypto_str->name))
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * f2fs_fname_crypto_free_buffer() -
+ *
+ * Frees the buffer allocated for crypto operation.
+ */
+void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str)
+{
+ if (!crypto_str)
+ return;
+ kfree(crypto_str->name);
+ crypto_str->name = NULL;
+}
+
+/**
+ * f2fs_fname_disk_to_usr() - converts a filename from disk space to user space
+ */
+int f2fs_fname_disk_to_usr(struct inode *inode,
+ f2fs_hash_t *hash,
+ const struct f2fs_str *iname,
+ struct f2fs_str *oname)
+{
+ const struct qstr qname = FSTR_TO_QSTR(iname);
+ char buf[24];
+ int ret;
+
+ if (is_dot_dotdot(&qname)) {
+ oname->name[0] = '.';
+ oname->name[iname->len - 1] = '.';
+ oname->len = iname->len;
+ return oname->len;
+ }
+
+ if (F2FS_I(inode)->i_crypt_info)
+ return f2fs_fname_decrypt(inode, iname, oname);
+
+ if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) {
+ ret = digest_encode(iname->name, iname->len, oname->name);
+ oname->len = ret;
+ return ret;
+ }
+ if (hash) {
+ memcpy(buf, hash, 4);
+ memset(buf + 4, 0, 4);
+ } else
+ memset(buf, 0, 8);
+ memcpy(buf + 8, iname->name + iname->len - 16, 16);
+ oname->name[0] = '_';
+ ret = digest_encode(buf, 24, oname->name + 1);
+ oname->len = ret + 1;
+ return ret + 1;
+}
+
+/**
+ * f2fs_fname_usr_to_disk() - converts a filename from user space to disk space
+ */
+int f2fs_fname_usr_to_disk(struct inode *inode,
+ const struct qstr *iname,
+ struct f2fs_str *oname)
+{
+ int res;
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+
+ if (is_dot_dotdot(iname)) {
+ oname->name[0] = '.';
+ oname->name[iname->len - 1] = '.';
+ oname->len = iname->len;
+ return oname->len;
+ }
+
+ if (ci) {
+ res = f2fs_fname_encrypt(inode, iname, oname);
+ return res;
+ }
+ /* Without a proper key, a user is not allowed to modify the filenames
+ * in a directory. Consequently, a user space name cannot be mapped to
+ * a disk-space name */
+ return -EACCES;
+}
+
+int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
+ int lookup, struct f2fs_filename *fname)
+{
+ struct f2fs_crypt_info *ci;
+ int ret = 0, bigname = 0;
+
+ memset(fname, 0, sizeof(struct f2fs_filename));
+ fname->usr_fname = iname;
+
+ if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) {
+ fname->disk_name.name = (unsigned char *)iname->name;
+ fname->disk_name.len = iname->len;
+ return 0;
+ }
+ ret = f2fs_get_encryption_info(dir);
+ if (ret)
+ return ret;
+ ci = F2FS_I(dir)->i_crypt_info;
+ if (ci) {
+ ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len,
+ &fname->crypto_buf);
+ if (ret < 0)
+ return ret;
+ ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf);
+ if (ret < 0)
+ goto errout;
+ fname->disk_name.name = fname->crypto_buf.name;
+ fname->disk_name.len = fname->crypto_buf.len;
+ return 0;
+ }
+ if (!lookup)
+ return -EACCES;
+
+ /* We don't have the key and we are doing a lookup; decode the
+ * user-supplied name
+ */
+ if (iname->name[0] == '_')
+ bigname = 1;
+ if ((bigname && (iname->len != 33)) ||
+ (!bigname && (iname->len > 43)))
+ return -ENOENT;
+
+ fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
+ if (fname->crypto_buf.name == NULL)
+ return -ENOMEM;
+ ret = digest_decode(iname->name + bigname, iname->len - bigname,
+ fname->crypto_buf.name);
+ if (ret < 0) {
+ ret = -ENOENT;
+ goto errout;
+ }
+ fname->crypto_buf.len = ret;
+ if (bigname) {
+ memcpy(&fname->hash, fname->crypto_buf.name, 4);
+ } else {
+ fname->disk_name.name = fname->crypto_buf.name;
+ fname->disk_name.len = fname->crypto_buf.len;
+ }
+ return 0;
+errout:
+ f2fs_fname_crypto_free_buffer(&fname->crypto_buf);
+ return ret;
+}
+
+void f2fs_fname_free_filename(struct f2fs_filename *fname)
+{
+ kfree(fname->crypto_buf.name);
+ fname->crypto_buf.name = NULL;
+ fname->usr_fname = NULL;
+ fname->disk_name.name = NULL;
+}
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
new file mode 100644
index 000000000000..95b8f936f00b
--- /dev/null
+++ b/fs/f2fs/crypto_key.c
@@ -0,0 +1,255 @@
+/*
+ * linux/fs/f2fs/crypto_key.c
+ *
+ * Copied from linux/fs/f2fs/crypto_key.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption key functions for f2fs
+ *
+ * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
+ */
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <uapi/linux/keyctl.h>
+#include <crypto/hash.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "xattr.h"
+
+static void derive_crypt_complete(struct crypto_async_request *req, int rc)
+{
+ struct f2fs_completion_result *ecr = req->data;
+
+ if (rc == -EINPROGRESS)
+ return;
+
+ ecr->res = rc;
+ complete(&ecr->completion);
+}
+
+/**
+ * f2fs_derive_key_aes() - Derive a key using AES-128-ECB
+ * @deriving_key: Encryption key used for derivatio.
+ * @source_key: Source key to which to apply derivation.
+ * @derived_key: Derived key.
+ *
+ * Return: Zero on success; non-zero otherwise.
+ */
+static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE],
+ char source_key[F2FS_AES_256_XTS_KEY_SIZE],
+ char derived_key[F2FS_AES_256_XTS_KEY_SIZE])
+{
+ int res = 0;
+ struct ablkcipher_request *req = NULL;
+ DECLARE_F2FS_COMPLETION_RESULT(ecr);
+ struct scatterlist src_sg, dst_sg;
+ struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
+ 0);
+
+ if (IS_ERR(tfm)) {
+ res = PTR_ERR(tfm);
+ tfm = NULL;
+ goto out;
+ }
+ crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ res = -ENOMEM;
+ goto out;
+ }
+ ablkcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ derive_crypt_complete, &ecr);
+ res = crypto_ablkcipher_setkey(tfm, deriving_key,
+ F2FS_AES_128_ECB_KEY_SIZE);
+ if (res < 0)
+ goto out;
+
+ sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE);
+ sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE);
+ ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
+ F2FS_AES_256_XTS_KEY_SIZE, NULL);
+ res = crypto_ablkcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+out:
+ if (req)
+ ablkcipher_request_free(req);
+ if (tfm)
+ crypto_free_ablkcipher(tfm);
+ return res;
+}
+
+static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
+{
+ if (!ci)
+ return;
+
+ if (ci->ci_keyring_key)
+ key_put(ci->ci_keyring_key);
+ crypto_free_ablkcipher(ci->ci_ctfm);
+ kmem_cache_free(f2fs_crypt_info_cachep, ci);
+}
+
+void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_crypt_info *prev;
+
+ if (ci == NULL)
+ ci = ACCESS_ONCE(fi->i_crypt_info);
+ if (ci == NULL)
+ return;
+ prev = cmpxchg(&fi->i_crypt_info, ci, NULL);
+ if (prev != ci)
+ return;
+
+ f2fs_free_crypt_info(ci);
+}
+
+int _f2fs_get_encryption_info(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_crypt_info *crypt_info;
+ char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
+ (F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
+ struct key *keyring_key = NULL;
+ struct f2fs_encryption_key *master_key;
+ struct f2fs_encryption_context ctx;
+ struct user_key_payload *ukp;
+ struct crypto_ablkcipher *ctfm;
+ const char *cipher_str;
+ char raw_key[F2FS_MAX_KEY_SIZE];
+ char mode;
+ int res;
+
+ res = f2fs_crypto_initialize();
+ if (res)
+ return res;
+retry:
+ crypt_info = ACCESS_ONCE(fi->i_crypt_info);
+ if (crypt_info) {
+ if (!crypt_info->ci_keyring_key ||
+ key_validate(crypt_info->ci_keyring_key) == 0)
+ return 0;
+ f2fs_free_encryption_info(inode, crypt_info);
+ goto retry;
+ }
+
+ res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &ctx, sizeof(ctx), NULL);
+ if (res < 0)
+ return res;
+ else if (res != sizeof(ctx))
+ return -EINVAL;
+ res = 0;
+
+ crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS);
+ if (!crypt_info)
+ return -ENOMEM;
+
+ crypt_info->ci_flags = ctx.flags;
+ crypt_info->ci_data_mode = ctx.contents_encryption_mode;
+ crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
+ crypt_info->ci_ctfm = NULL;
+ crypt_info->ci_keyring_key = NULL;
+ memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
+ sizeof(crypt_info->ci_master_key));
+ if (S_ISREG(inode->i_mode))
+ mode = crypt_info->ci_data_mode;
+ else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ mode = crypt_info->ci_filename_mode;
+ else
+ BUG();
+
+ switch (mode) {
+ case F2FS_ENCRYPTION_MODE_AES_256_XTS:
+ cipher_str = "xts(aes)";
+ break;
+ case F2FS_ENCRYPTION_MODE_AES_256_CTS:
+ cipher_str = "cts(cbc(aes))";
+ break;
+ default:
+ printk_once(KERN_WARNING
+ "f2fs: unsupported key mode %d (ino %u)\n",
+ mode, (unsigned) inode->i_ino);
+ res = -ENOKEY;
+ goto out;
+ }
+
+ memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX,
+ F2FS_KEY_DESC_PREFIX_SIZE);
+ sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE,
+ "%*phN", F2FS_KEY_DESCRIPTOR_SIZE,
+ ctx.master_key_descriptor);
+ full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
+ (2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0';
+ keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
+ if (IS_ERR(keyring_key)) {
+ res = PTR_ERR(keyring_key);
+ keyring_key = NULL;
+ goto out;
+ }
+ crypt_info->ci_keyring_key = keyring_key;
+ BUG_ON(keyring_key->type != &key_type_logon);
+ ukp = ((struct user_key_payload *)keyring_key->payload.data);
+ if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
+ res = -EINVAL;
+ goto out;
+ }
+ master_key = (struct f2fs_encryption_key *)ukp->data;
+ BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE !=
+ F2FS_KEY_DERIVATION_NONCE_SIZE);
+ BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE);
+ res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
+ raw_key);
+ if (res)
+ goto out;
+
+ ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+ if (!ctfm || IS_ERR(ctfm)) {
+ res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+ printk(KERN_DEBUG
+ "%s: error %d (inode %u) allocating crypto tfm\n",
+ __func__, res, (unsigned) inode->i_ino);
+ goto out;
+ }
+ crypt_info->ci_ctfm = ctfm;
+ crypto_ablkcipher_clear_flags(ctfm, ~0);
+ crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+ CRYPTO_TFM_REQ_WEAK_KEY);
+ res = crypto_ablkcipher_setkey(ctfm, raw_key,
+ f2fs_encryption_key_size(mode));
+ if (res)
+ goto out;
+
+ memzero_explicit(raw_key, sizeof(raw_key));
+ if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) {
+ f2fs_free_crypt_info(crypt_info);
+ goto retry;
+ }
+ return 0;
+
+out:
+ if (res == -ENOKEY && !S_ISREG(inode->i_mode))
+ res = 0;
+
+ f2fs_free_crypt_info(crypt_info);
+ memzero_explicit(raw_key, sizeof(raw_key));
+ return res;
+}
+
+int f2fs_has_encryption_key(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ return (fi->i_crypt_info != NULL);
+}
diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c
new file mode 100644
index 000000000000..d4a96af513c2
--- /dev/null
+++ b/fs/f2fs/crypto_policy.c
@@ -0,0 +1,209 @@
+/*
+ * copied from linux/fs/ext4/crypto_policy.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility.
+ *
+ * This contains encryption policy functions for f2fs with some modifications
+ * to support f2fs-specific xattr APIs.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "xattr.h"
+
+static int f2fs_inode_has_encryption_context(struct inode *inode)
+{
+ int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL);
+ return (res > 0);
+}
+
+/*
+ * check whether the policy is consistent with the encryption context
+ * for the inode
+ */
+static int f2fs_is_encryption_context_consistent_with_policy(
+ struct inode *inode, const struct f2fs_encryption_policy *policy)
+{
+ struct f2fs_encryption_context ctx;
+ int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
+ sizeof(ctx), NULL);
+
+ if (res != sizeof(ctx))
+ return 0;
+
+ return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
+ F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (ctx.flags == policy->flags) &&
+ (ctx.contents_encryption_mode ==
+ policy->contents_encryption_mode) &&
+ (ctx.filenames_encryption_mode ==
+ policy->filenames_encryption_mode));
+}
+
+static int f2fs_create_encryption_context_from_policy(
+ struct inode *inode, const struct f2fs_encryption_policy *policy)
+{
+ struct f2fs_encryption_context ctx;
+
+ ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+ memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
+ F2FS_KEY_DESCRIPTOR_SIZE);
+
+ if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid contents encryption mode %d\n", __func__,
+ policy->contents_encryption_mode);
+ return -EINVAL;
+ }
+
+ if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid filenames encryption mode %d\n", __func__,
+ policy->filenames_encryption_mode);
+ return -EINVAL;
+ }
+
+ if (policy->flags & ~F2FS_POLICY_FLAGS_VALID)
+ return -EINVAL;
+
+ ctx.contents_encryption_mode = policy->contents_encryption_mode;
+ ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
+ ctx.flags = policy->flags;
+ BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE);
+ get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
+
+ return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
+ sizeof(ctx), NULL, XATTR_CREATE);
+}
+
+int f2fs_process_policy(const struct f2fs_encryption_policy *policy,
+ struct inode *inode)
+{
+ if (policy->version != 0)
+ return -EINVAL;
+
+ if (!S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ if (!f2fs_inode_has_encryption_context(inode)) {
+ if (!f2fs_empty_dir(inode))
+ return -ENOTEMPTY;
+ return f2fs_create_encryption_context_from_policy(inode,
+ policy);
+ }
+
+ if (f2fs_is_encryption_context_consistent_with_policy(inode, policy))
+ return 0;
+
+ printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
+ __func__);
+ return -EINVAL;
+}
+
+int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy)
+{
+ struct f2fs_encryption_context ctx;
+ int res;
+
+ if (!f2fs_encrypted_inode(inode))
+ return -ENODATA;
+
+ res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &ctx, sizeof(ctx), NULL);
+ if (res != sizeof(ctx))
+ return -ENODATA;
+ if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1)
+ return -EINVAL;
+
+ policy->version = 0;
+ policy->contents_encryption_mode = ctx.contents_encryption_mode;
+ policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
+ policy->flags = ctx.flags;
+ memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
+ F2FS_KEY_DESCRIPTOR_SIZE);
+ return 0;
+}
+
+int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
+ struct inode *child)
+{
+ struct f2fs_crypt_info *parent_ci, *child_ci;
+ int res;
+
+ if ((parent == NULL) || (child == NULL)) {
+ pr_err("parent %p child %p\n", parent, child);
+ BUG_ON(1);
+ }
+
+ /* no restrictions if the parent directory is not encrypted */
+ if (!f2fs_encrypted_inode(parent))
+ return 1;
+ /* if the child directory is not encrypted, this is always a problem */
+ if (!f2fs_encrypted_inode(child))
+ return 0;
+ res = f2fs_get_encryption_info(parent);
+ if (res)
+ return 0;
+ res = f2fs_get_encryption_info(child);
+ if (res)
+ return 0;
+ parent_ci = F2FS_I(parent)->i_crypt_info;
+ child_ci = F2FS_I(child)->i_crypt_info;
+ if (!parent_ci && !child_ci)
+ return 1;
+ if (!parent_ci || !child_ci)
+ return 0;
+
+ return (memcmp(parent_ci->ci_master_key,
+ child_ci->ci_master_key,
+ F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+ (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
+ (parent_ci->ci_flags == child_ci->ci_flags));
+}
+
+/**
+ * f2fs_inherit_context() - Sets a child context from its parent
+ * @parent: Parent inode from which the context is inherited.
+ * @child: Child inode that inherits the context from @parent.
+ *
+ * Return: Zero on success, non-zero otherwise
+ */
+int f2fs_inherit_context(struct inode *parent, struct inode *child,
+ struct page *ipage)
+{
+ struct f2fs_encryption_context ctx;
+ struct f2fs_crypt_info *ci;
+ int res;
+
+ res = f2fs_get_encryption_info(parent);
+ if (res < 0)
+ return res;
+
+ ci = F2FS_I(parent)->i_crypt_info;
+ BUG_ON(ci == NULL);
+
+ ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+
+ ctx.contents_encryption_mode = ci->ci_data_mode;
+ ctx.filenames_encryption_mode = ci->ci_filename_mode;
+ ctx.flags = ci->ci_flags;
+ memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+ F2FS_KEY_DESCRIPTOR_SIZE);
+
+ get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
+ return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
+ sizeof(ctx), ipage, XATTR_CREATE);
+}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 1e1aae669fa8..9bedfa8dd3a5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -18,6 +18,7 @@
#include <linux/bio.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
+#include <linux/cleancache.h>
#include "f2fs.h"
#include "node.h"
@@ -33,6 +34,15 @@ static void f2fs_read_end_io(struct bio *bio, int err)
struct bio_vec *bvec;
int i;
+ if (f2fs_bio_encrypted(bio)) {
+ if (err) {
+ f2fs_release_crypto_ctx(bio->bi_private);
+ } else {
+ f2fs_end_io_crypto_work(bio->bi_private, bio);
+ return;
+ }
+ }
+
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
@@ -56,6 +66,8 @@ static void f2fs_write_end_io(struct bio *bio, int err)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
+ f2fs_restore_and_release_control_page(&page);
+
if (unlikely(err)) {
set_page_dirty(page);
set_bit(AS_EIO, &page->mapping->flags);
@@ -86,7 +98,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
- bio->bi_private = sbi;
+ bio->bi_private = is_read ? NULL : sbi;
return bio;
}
@@ -133,16 +145,16 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
* Fill the locked page with data located in the block address.
* Return unlocked page.
*/
-int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_io_info *fio)
+int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio;
+ struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
trace_f2fs_submit_page_bio(page, fio);
- f2fs_trace_ios(page, fio, 0);
+ f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
- bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));
+ bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
@@ -154,12 +166,13 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
return 0;
}
-void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_io_info *fio)
+void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
{
+ struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
bool is_read = is_read_io(fio->rw);
+ struct page *bio_page;
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -181,17 +194,19 @@ alloc_new:
io->fio = *fio;
}
- if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
+ bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+ if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
PAGE_CACHE_SIZE) {
__submit_merged_bio(io);
goto alloc_new;
}
io->last_block_in_bio = fio->blk_addr;
- f2fs_trace_ios(page, fio, 0);
+ f2fs_trace_ios(fio, 0);
up_write(&io->io_rwsem);
- trace_f2fs_submit_page_mbio(page, fio);
+ trace_f2fs_submit_page_mbio(fio->page, fio);
}
/*
@@ -251,19 +266,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
return err;
}
-static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs,
- struct extent_info *ei, struct buffer_head *bh_result)
-{
- unsigned int blkbits = sb->s_blocksize_bits;
- size_t max_size = bh_result->b_size;
- size_t mapped_size;
-
- clear_buffer_new(bh_result);
- map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs);
- mapped_size = (ei->fofs + ei->len - pgofs) << blkbits;
- bh_result->b_size = min(max_size, mapped_size);
-}
-
static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
@@ -905,7 +907,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
sync_inode_page(dn);
}
-struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
+struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
{
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
@@ -913,83 +915,15 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
struct extent_info ei;
int err;
struct f2fs_io_info fio = {
+ .sbi = F2FS_I_SB(inode),
.type = DATA,
- .rw = sync ? READ_SYNC : READA,
+ .rw = rw,
+ .encrypted_page = NULL,
};
- /*
- * If sync is false, it needs to check its block allocation.
- * This is need and triggered by two flows:
- * gc and truncate_partial_data_page.
- */
- if (!sync)
- goto search;
-
- page = find_get_page(mapping, index);
- if (page && PageUptodate(page))
- return page;
- f2fs_put_page(page, 0);
-search:
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
- dn.data_blkaddr = ei.blk + index - ei.fofs;
- goto got_it;
- }
-
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
- if (err)
- return ERR_PTR(err);
- f2fs_put_dnode(&dn);
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return read_mapping_page(mapping, index, NULL);
- if (dn.data_blkaddr == NULL_ADDR)
- return ERR_PTR(-ENOENT);
-
- /* By fallocate(), there is no cached page, but with NEW_ADDR */
- if (unlikely(dn.data_blkaddr == NEW_ADDR))
- return ERR_PTR(-EINVAL);
-
-got_it:
- page = grab_cache_page(mapping, index);
- if (!page)
- return ERR_PTR(-ENOMEM);
-
- if (PageUptodate(page)) {
- unlock_page(page);
- return page;
- }
-
- fio.blk_addr = dn.data_blkaddr;
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
- if (err)
- return ERR_PTR(err);
-
- if (sync) {
- wait_on_page_locked(page);
- if (unlikely(!PageUptodate(page))) {
- f2fs_put_page(page, 0);
- return ERR_PTR(-EIO);
- }
- }
- return page;
-}
-
-/*
- * If it tries to access a hole, return an error.
- * Because, the callers, functions in dir.c and GC, should be able to know
- * whether this page exists or not.
- */
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
-{
- struct address_space *mapping = inode->i_mapping;
- struct dnode_of_data dn;
- struct page *page;
- struct extent_info ei;
- int err;
- struct f2fs_io_info fio = {
- .type = DATA,
- .rw = READ_SYNC,
- };
-repeat:
page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -1011,10 +945,11 @@ repeat:
f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
}
-
got_it:
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+ unlock_page(page);
return page;
+ }
/*
* A new dentry page is allocated but not able to be written, since its
@@ -1025,14 +960,58 @@ got_it:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
+ unlock_page(page);
return page;
}
fio.blk_addr = dn.data_blkaddr;
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
+ fio.page = page;
+ err = f2fs_submit_page_bio(&fio);
if (err)
return ERR_PTR(err);
+ return page;
+}
+
+struct page *find_data_page(struct inode *inode, pgoff_t index)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+
+ page = find_get_page(mapping, index);
+ if (page && PageUptodate(page))
+ return page;
+ f2fs_put_page(page, 0);
+ page = get_read_data_page(inode, index, READ_SYNC);
+ if (IS_ERR(page))
+ return page;
+
+ if (PageUptodate(page))
+ return page;
+
+ wait_on_page_locked(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 0);
+ return ERR_PTR(-EIO);
+ }
+ return page;
+}
+
+/*
+ * If it tries to access a hole, return an error.
+ * Because, the callers, functions in dir.c and GC, should be able to know
+ * whether this page exists or not.
+ */
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+repeat:
+ page = get_read_data_page(inode, index, READ_SYNC);
+ if (IS_ERR(page))
+ return page;
+
+ /* wait for read completion */
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
@@ -1060,46 +1039,37 @@ struct page *get_new_data_page(struct inode *inode,
struct page *page;
struct dnode_of_data dn;
int err;
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, index);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
return ERR_PTR(err);
-repeat:
- page = grab_cache_page(mapping, index);
- if (!page) {
- err = -ENOMEM;
- goto put_err;
}
+ if (!ipage)
+ f2fs_put_dnode(&dn);
if (PageUptodate(page))
- return page;
+ goto got_it;
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
- struct f2fs_io_info fio = {
- .type = DATA,
- .rw = READ_SYNC,
- .blk_addr = dn.data_blkaddr,
- };
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
- if (err)
- goto put_err;
+ f2fs_put_page(page, 1);
- lock_page(page);
- if (unlikely(!PageUptodate(page))) {
- f2fs_put_page(page, 1);
- err = -EIO;
- goto put_err;
- }
- if (unlikely(page->mapping != mapping)) {
- f2fs_put_page(page, 1);
+ page = get_read_data_page(inode, index, READ_SYNC);
+ if (IS_ERR(page))
goto repeat;
- }
- }
+ /* wait for read completion */
+ lock_page(page);
+ }
+got_it:
if (new_i_size &&
i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
@@ -1107,10 +1077,6 @@ repeat:
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
}
return page;
-
-put_err:
- f2fs_put_dnode(&dn);
- return ERR_PTR(err);
}
static int __allocate_data_block(struct dnode_of_data *dn)
@@ -1208,18 +1174,18 @@ out:
}
/*
- * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
+ * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
+ * f2fs_map_blocks structure.
* If original data blocks are allocated, then give them to blockdev.
* Otherwise,
* a. preallocate requested block addresses
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
-static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create, bool fiemap)
+static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+ int create, bool fiemap)
{
- unsigned int blkbits = inode->i_sb->s_blocksize_bits;
- unsigned maxblocks = bh_result->b_size >> blkbits;
+ unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
pgoff_t pgofs, end_offset;
@@ -1227,11 +1193,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
struct extent_info ei;
bool allocated = false;
- /* Get the page offset from the block offset(iblock) */
- pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
+ map->m_len = 0;
+ map->m_flags = 0;
+
+ /* it only supports block size == page size */
+ pgofs = (pgoff_t)map->m_lblk;
if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
- f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result);
+ map->m_pblk = ei.blk + pgofs - ei.fofs;
+ map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
+ map->m_flags = F2FS_MAP_MAPPED;
goto out;
}
@@ -1250,21 +1221,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
- clear_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ map->m_flags = F2FS_MAP_MAPPED;
+ map->m_pblk = dn.data_blkaddr;
+ if (dn.data_blkaddr == NEW_ADDR)
+ map->m_flags |= F2FS_MAP_UNWRITTEN;
} else if (create) {
err = __allocate_data_block(&dn);
if (err)
goto put_out;
allocated = true;
- set_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
+ map->m_pblk = dn.data_blkaddr;
} else {
goto put_out;
}
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
- bh_result->b_size = (((size_t)1) << blkbits);
+ map->m_len = 1;
dn.ofs_in_node++;
pgofs++;
@@ -1288,22 +1261,25 @@ get_next:
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
}
- if (maxblocks > (bh_result->b_size >> blkbits)) {
+ if (maxblocks > map->m_len) {
block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NULL_ADDR && create) {
err = __allocate_data_block(&dn);
if (err)
goto sync_out;
allocated = true;
- set_buffer_new(bh_result);
+ map->m_flags |= F2FS_MAP_NEW;
blkaddr = dn.data_blkaddr;
}
/* Give more consecutive addresses for the readahead */
- if (blkaddr == (bh_result->b_blocknr + ofs)) {
+ if ((map->m_pblk != NEW_ADDR &&
+ blkaddr == (map->m_pblk + ofs)) ||
+ (map->m_pblk == NEW_ADDR &&
+ blkaddr == NEW_ADDR)) {
ofs++;
dn.ofs_in_node++;
pgofs++;
- bh_result->b_size += (((size_t)1) << blkbits);
+ map->m_len++;
goto get_next;
}
}
@@ -1316,10 +1292,28 @@ unlock_out:
if (create)
f2fs_unlock_op(F2FS_I_SB(inode));
out:
- trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+ trace_f2fs_map_blocks(inode, map, err);
return err;
}
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create, bool fiemap)
+{
+ struct f2fs_map_blocks map;
+ int ret;
+
+ map.m_lblk = iblock;
+ map.m_len = bh->b_size >> inode->i_blkbits;
+
+ ret = f2fs_map_blocks(inode, &map, create, fiemap);
+ if (!ret) {
+ map_bh(bh, inode->i_sb, map.m_pblk);
+ bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
+ bh->b_size = map.m_len << inode->i_blkbits;
+ }
+ return ret;
+}
+
static int get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -1332,11 +1326,268 @@ static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create, true);
}
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+ return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+ return (blk << inode->i_blkbits);
+}
+
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo,
- start, len, get_data_block_fiemap);
+ struct buffer_head map_bh;
+ sector_t start_blk, last_blk;
+ loff_t isize = i_size_read(inode);
+ u64 logical = 0, phys = 0, size = 0;
+ u32 flags = 0;
+ bool past_eof = false, whole_file = false;
+ int ret = 0;
+
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (len >= isize) {
+ whole_file = true;
+ len = isize;
+ }
+
+ if (logical_to_blk(inode, len) == 0)
+ len = blk_to_logical(inode, 1);
+
+ start_blk = logical_to_blk(inode, start);
+ last_blk = logical_to_blk(inode, start + len - 1);
+next:
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len;
+
+ ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0);
+ if (ret)
+ goto out;
+
+ /* HOLE */
+ if (!buffer_mapped(&map_bh)) {
+ start_blk++;
+
+ if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
+ past_eof = 1;
+
+ if (past_eof && size) {
+ flags |= FIEMAP_EXTENT_LAST;
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ } else if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ size = 0;
+ }
+
+ /* if we have holes up to/past EOF then we're done */
+ if (start_blk > last_blk || past_eof || ret)
+ goto out;
+ } else {
+ if (start_blk > last_blk && !whole_file) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ goto out;
+ }
+
+ /*
+ * if size != 0 then we know we already have an extent
+ * to add, so add it.
+ */
+ if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ if (ret)
+ goto out;
+ }
+
+ logical = blk_to_logical(inode, start_blk);
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
+ flags = 0;
+ if (buffer_unwritten(&map_bh))
+ flags = FIEMAP_EXTENT_UNWRITTEN;
+
+ start_blk += logical_to_blk(inode, size);
+
+ /*
+ * If we are past the EOF, then we need to make sure as
+ * soon as we find a hole that the last extent we found
+ * is marked with FIEMAP_EXTENT_LAST
+ */
+ if (!past_eof && logical + size >= isize)
+ past_eof = true;
+ }
+ cond_resched();
+ if (fatal_signal_pending(current))
+ ret = -EINTR;
+ else
+ goto next;
+out:
+ if (ret == 1)
+ ret = 0;
+
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
+/*
+ * This function was originally taken from fs/mpage.c, and customized for f2fs.
+ * Major change was from block_size == page_size in f2fs by default.
+ */
+static int f2fs_mpage_readpages(struct address_space *mapping,
+ struct list_head *pages, struct page *page,
+ unsigned nr_pages)
+{
+ struct bio *bio = NULL;
+ unsigned page_idx;
+ sector_t last_block_in_bio = 0;
+ struct inode *inode = mapping->host;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t block_nr;
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ struct f2fs_map_blocks map;
+
+ map.m_pblk = 0;
+ map.m_lblk = 0;
+ map.m_len = 0;
+ map.m_flags = 0;
+
+ for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
+
+ prefetchw(&page->flags);
+ if (pages) {
+ page = list_entry(pages->prev, struct page, lru);
+ list_del(&page->lru);
+ if (add_to_page_cache_lru(page, mapping,
+ page->index, GFP_KERNEL))
+ goto next_page;
+ }
+
+ block_in_file = (sector_t)page->index;
+ last_block = block_in_file + nr_pages;
+ last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
+ blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map.m_flags & F2FS_MAP_MAPPED) &&
+ block_in_file > map.m_lblk &&
+ block_in_file < (map.m_lblk + map.m_len))
+ goto got_it;
+
+ /*
+ * Then do more f2fs_map_blocks() calls until we are
+ * done with this page.
+ */
+ map.m_flags = 0;
+
+ if (block_in_file < last_block) {
+ map.m_lblk = block_in_file;
+ map.m_len = last_block - block_in_file;
+
+ if (f2fs_map_blocks(inode, &map, 0, false))
+ goto set_error_page;
+ }
+got_it:
+ if ((map.m_flags & F2FS_MAP_MAPPED)) {
+ block_nr = map.m_pblk + block_in_file - map.m_lblk;
+ SetPageMappedToDisk(page);
+
+ if (!PageUptodate(page) && !cleancache_get_page(page)) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+ } else {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto next_page;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && (last_block_in_bio != block_nr - 1)) {
+submit_and_realloc:
+ submit_bio(READ, bio);
+ bio = NULL;
+ }
+ if (bio == NULL) {
+ struct f2fs_crypto_ctx *ctx = NULL;
+
+ if (f2fs_encrypted_inode(inode) &&
+ S_ISREG(inode->i_mode)) {
+ struct page *cpage;
+
+ ctx = f2fs_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ goto set_error_page;
+
+ /* wait the page to be moved by cleaning */
+ cpage = find_lock_page(
+ META_MAPPING(F2FS_I_SB(inode)),
+ block_nr);
+ if (cpage) {
+ f2fs_wait_on_page_writeback(cpage,
+ DATA);
+ f2fs_put_page(cpage, 1);
+ }
+ }
+
+ bio = bio_alloc(GFP_KERNEL,
+ min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+ if (!bio) {
+ if (ctx)
+ f2fs_release_crypto_ctx(ctx);
+ goto set_error_page;
+ }
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
+ bio->bi_end_io = f2fs_read_end_io;
+ bio->bi_private = ctx;
+ }
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ last_block_in_bio = block_nr;
+ goto next_page;
+set_error_page:
+ SetPageError(page);
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ unlock_page(page);
+ goto next_page;
+confused:
+ if (bio) {
+ submit_bio(READ, bio);
+ bio = NULL;
+ }
+ unlock_page(page);
+next_page:
+ if (pages)
+ page_cache_release(page);
+ }
+ BUG_ON(pages && !list_empty(pages));
+ if (bio)
+ submit_bio(READ, bio);
+ return 0;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -1350,8 +1601,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = mpage_readpage(page, get_data_block);
-
+ ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
return ret;
}
@@ -1365,11 +1615,12 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
- return mpage_readpages(mapping, pages, nr_pages, get_data_block);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
}
-int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
+int do_write_data_page(struct f2fs_io_info *fio)
{
+ struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
int err = 0;
@@ -1387,6 +1638,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
goto out_writepage;
}
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ fio->encrypted_page = f2fs_encrypt(inode, fio->page);
+ if (IS_ERR(fio->encrypted_page)) {
+ err = PTR_ERR(fio->encrypted_page);
+ goto out_writepage;
+ }
+ }
+
set_page_writeback(page);
/*
@@ -1396,11 +1655,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
- rewrite_data_page(page, fio);
+ rewrite_data_page(fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
trace_f2fs_do_write_data_page(page, IPU);
} else {
- write_data_page(page, &dn, fio);
+ write_data_page(&dn, fio);
set_data_blkaddr(&dn);
f2fs_update_extent_cache(&dn);
trace_f2fs_do_write_data_page(page, OPU);
@@ -1425,8 +1684,11 @@ static int f2fs_write_data_page(struct page *page,
bool need_balance_fs = false;
int err = 0;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = DATA,
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ .page = page,
+ .encrypted_page = NULL,
};
trace_f2fs_writepage(page, DATA);
@@ -1456,7 +1718,7 @@ write:
if (S_ISDIR(inode->i_mode)) {
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
- err = do_write_data_page(page, &fio);
+ err = do_write_data_page(&fio);
goto done;
}
@@ -1476,7 +1738,7 @@ write:
if (f2fs_has_inline_data(inode))
err = f2fs_write_inline_data(inode, page);
if (err == -EAGAIN)
- err = do_write_data_page(page, &fio);
+ err = do_write_data_page(&fio);
f2fs_unlock_op(sbi);
done:
if (err && err != -ENOENT)
@@ -1645,11 +1907,14 @@ put_next:
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = DATA,
.rw = READ_SYNC,
.blk_addr = dn.data_blkaddr,
+ .page = page,
+ .encrypted_page = NULL,
};
- err = f2fs_submit_page_bio(sbi, page, &fio);
+ err = f2fs_submit_page_bio(&fio);
if (err)
goto fail;
@@ -1663,6 +1928,15 @@ put_next:
f2fs_put_page(page, 1);
goto repeat;
}
+
+ /* avoid symlink page */
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ err = f2fs_decrypt_one(inode, page);
+ if (err) {
+ f2fs_put_page(page, 1);
+ goto fail;
+ }
+ }
}
out:
SetPageUptodate(page);
@@ -1733,6 +2007,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
return err;
}
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return 0;
+
if (check_direct_IO(inode, iter, offset))
return 0;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index f5388f37217e..75176e0dd6c8 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -94,7 +94,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
static void update_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
+ unsigned long long blks_per_sec, hblks_per_sec, total_vblocks;
+ unsigned long long bimodal, dist;
unsigned int segno, vblocks;
int ndirty = 0;
@@ -112,10 +113,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
ndirty++;
}
}
- dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
- si->bimodal = bimodal / dist;
+ dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
+ si->bimodal = div_u64(bimodal, dist);
if (si->dirty_count)
- si->avg_vblocks = total_vblocks / ndirty;
+ si->avg_vblocks = div_u64(total_vblocks, ndirty);
else
si->avg_vblocks = 0;
}
@@ -143,7 +144,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct sit_info);
si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
- si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+ si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
if (sbi->segs_per_sec > 1)
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 3a3302ab7871..a34ebd8312ab 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -76,20 +76,10 @@ static unsigned long dir_block_index(unsigned int level,
return bidx;
}
-static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
- struct f2fs_dir_entry *de)
-{
- if (le16_to_cpu(de->name_len) != namelen)
- return false;
-
- if (de->hash_code != namehash)
- return false;
-
- return true;
-}
-
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- struct qstr *name, int *max_slots,
+ struct f2fs_filename *fname,
+ f2fs_hash_t namehash,
+ int *max_slots,
struct page **res_page)
{
struct f2fs_dentry_block *dentry_blk;
@@ -98,9 +88,8 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
- make_dentry_ptr(&d, (void *)dentry_blk, 1);
- de = find_target_dentry(name, max_slots, &d);
-
+ make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
+ de = find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
else
@@ -114,13 +103,15 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
return de;
}
-struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
- struct f2fs_dentry_ptr *d)
+struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
+ f2fs_hash_t namehash, int *max_slots,
+ struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
- f2fs_hash_t namehash = f2fs_dentry_hash(name);
int max_len = 0;
+ struct f2fs_str de_name = FSTR_INIT(NULL, 0);
+ struct f2fs_str *name = &fname->disk_name;
if (max_slots)
*max_slots = 0;
@@ -132,8 +123,18 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
}
de = &d->dentry[bit_pos];
- if (early_match_name(name->len, namehash, de) &&
- !memcmp(d->filename[bit_pos], name->name, name->len))
+
+ /* encrypted case */
+ de_name.name = d->filename[bit_pos];
+ de_name.len = le16_to_cpu(de->name_len);
+
+ /* show encrypted name */
+ if (fname->hash) {
+ if (de->hash_code == fname->hash)
+ goto found;
+ } else if (de_name.len == name->len &&
+ de->hash_code == namehash &&
+ !memcmp(de_name.name, name->name, name->len))
goto found;
if (max_slots && max_len > *max_slots)
@@ -155,16 +156,21 @@ found:
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
- unsigned int level, struct qstr *name,
- f2fs_hash_t namehash, struct page **res_page)
+ unsigned int level,
+ struct f2fs_filename *fname,
+ struct page **res_page)
{
- int s = GET_DENTRY_SLOTS(name->len);
+ struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
+ int s = GET_DENTRY_SLOTS(name.len);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots;
+ f2fs_hash_t namehash;
+
+ namehash = f2fs_dentry_hash(&name);
f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
@@ -177,13 +183,14 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
for (; bidx < end_block; bidx++) {
/* no need to allocate new dentry pages to all the indices */
- dentry_page = find_data_page(dir, bidx, true);
+ dentry_page = find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
room = true;
continue;
}
- de = find_in_block(dentry_page, name, &max_slots, res_page);
+ de = find_in_block(dentry_page, fname, namehash, &max_slots,
+ res_page);
if (de)
break;
@@ -211,30 +218,34 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
{
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
- f2fs_hash_t name_hash;
unsigned int max_depth;
unsigned int level;
+ struct f2fs_filename fname;
+ int err;
*res_page = NULL;
- if (f2fs_has_inline_dentry(dir))
- return find_in_inline_dir(dir, child, res_page);
+ err = f2fs_fname_setup_filename(dir, child, 1, &fname);
+ if (err)
+ return NULL;
+
+ if (f2fs_has_inline_dentry(dir)) {
+ de = find_in_inline_dir(dir, &fname, res_page);
+ goto out;
+ }
if (npages == 0)
- return NULL;
+ goto out;
- name_hash = f2fs_dentry_hash(child);
max_depth = F2FS_I(dir)->i_current_depth;
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, child, name_hash, res_page);
+ de = find_in_level(dir, level, &fname, res_page);
if (de)
break;
}
- if (!de && F2FS_I(dir)->chash != name_hash) {
- F2FS_I(dir)->chash = name_hash;
- F2FS_I(dir)->clevel = level - 1;
- }
+out:
+ f2fs_fname_free_filename(&fname);
return de;
}
@@ -303,10 +314,14 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
-int update_dent_inode(struct inode *inode, const struct qstr *name)
+int update_dent_inode(struct inode *inode, struct inode *to,
+ const struct qstr *name)
{
struct page *page;
+ if (file_enc_name(to))
+ return 0;
+
page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(page))
return PTR_ERR(page);
@@ -356,7 +371,7 @@ static int make_empty_dir(struct inode *inode,
dentry_blk = kmap_atomic(dentry_page);
- make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
do_make_empty_dir(inode, parent, &d);
kunmap_atomic(dentry_blk);
@@ -390,6 +405,12 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
err = f2fs_init_security(inode, dir, name, page);
if (err)
goto put_error;
+
+ if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) {
+ err = f2fs_inherit_context(dir, inode, page);
+ if (err)
+ goto put_error;
+ }
} else {
page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
if (IS_ERR(page))
@@ -501,24 +522,33 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
unsigned long bidx, block;
f2fs_hash_t dentry_hash;
unsigned int nbucket, nblock;
- size_t namelen = name->len;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
- int slots = GET_DENTRY_SLOTS(namelen);
struct page *page = NULL;
- int err = 0;
+ struct f2fs_filename fname;
+ struct qstr new_name;
+ int slots, err;
+
+ err = f2fs_fname_setup_filename(dir, name, 0, &fname);
+ if (err)
+ return err;
+
+ new_name.name = fname_name(&fname);
+ new_name.len = fname_len(&fname);
if (f2fs_has_inline_dentry(dir)) {
- err = f2fs_add_inline_entry(dir, name, inode, ino, mode);
+ err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
if (!err || err != -EAGAIN)
- return err;
+ goto out;
else
err = 0;
}
- dentry_hash = f2fs_dentry_hash(name);
level = 0;
+ slots = GET_DENTRY_SLOTS(new_name.len);
+ dentry_hash = f2fs_dentry_hash(&new_name);
+
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
level = F2FS_I(dir)->clevel;
@@ -526,8 +556,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
}
start:
- if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
- return -ENOSPC;
+ if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) {
+ err = -ENOSPC;
+ goto out;
+ }
/* Increase the depth, if required */
if (level == current_depth)
@@ -541,8 +573,10 @@ start:
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
- if (IS_ERR(dentry_page))
- return PTR_ERR(dentry_page);
+ if (IS_ERR(dentry_page)) {
+ err = PTR_ERR(dentry_page);
+ goto out;
+ }
dentry_blk = kmap(dentry_page);
bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
@@ -562,15 +596,17 @@ add_dentry:
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, name, NULL);
+ page = init_inode_metadata(inode, dir, &new_name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
+ if (f2fs_encrypted_inode(dir))
+ file_set_enc_name(inode);
}
- make_dentry_ptr(&d, (void *)dentry_blk, 1);
- f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos);
+ make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
+ f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
@@ -592,6 +628,8 @@ fail:
}
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
+out:
+ f2fs_fname_free_filename(&fname);
return err;
}
@@ -729,11 +767,12 @@ bool f2fs_empty_dir(struct inode *dir)
}
bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
- unsigned int start_pos)
+ unsigned int start_pos, struct f2fs_str *fstr)
{
unsigned char d_type = DT_UNKNOWN;
unsigned int bit_pos;
struct f2fs_dir_entry *de = NULL;
+ struct f2fs_str de_name = FSTR_INIT(NULL, 0);
bit_pos = ((unsigned long)ctx->pos % d->max);
@@ -747,8 +786,24 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
d_type = f2fs_filetype_table[de->file_type];
else
d_type = DT_UNKNOWN;
- if (!dir_emit(ctx, d->filename[bit_pos],
- le16_to_cpu(de->name_len),
+
+ /* encrypted case */
+ de_name.name = d->filename[bit_pos];
+ de_name.len = le16_to_cpu(de->name_len);
+
+ if (f2fs_encrypted_inode(d->inode)) {
+ int save_len = fstr->len;
+ int ret;
+
+ ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
+ &de_name, fstr);
+ de_name = *fstr;
+ fstr->len = save_len;
+ if (ret < 0)
+ return true;
+ }
+
+ if (!dir_emit(ctx, de_name.name, de_name.len,
le32_to_cpu(de->ino), d_type))
return true;
@@ -767,9 +822,24 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
struct f2fs_dentry_ptr d;
+ struct f2fs_str fstr = FSTR_INIT(NULL, 0);
+ int err = 0;
- if (f2fs_has_inline_dentry(inode))
- return f2fs_read_inline_dir(file, ctx);
+ if (f2fs_encrypted_inode(inode)) {
+ err = f2fs_get_encryption_info(inode);
+ if (err)
+ return err;
+
+ err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN,
+ &fstr);
+ if (err < 0)
+ return err;
+ }
+
+ if (f2fs_has_inline_dentry(inode)) {
+ err = f2fs_read_inline_dir(file, ctx, &fstr);
+ goto out;
+ }
/* readahead for multi pages of dir */
if (npages - n > 1 && !ra_has_index(ra, n))
@@ -783,9 +853,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_blk = kmap(dentry_page);
- make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
- if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK))
+ if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr))
goto stop;
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
@@ -798,8 +868,9 @@ stop:
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
-
- return 0;
+out:
+ f2fs_fname_crypto_free_buffer(&fstr);
+ return err;
}
const struct file_operations f2fs_dir_operations = {
@@ -808,4 +879,7 @@ const struct file_operations f2fs_dir_operations = {
.iterate = f2fs_readdir,
.fsync = f2fs_sync_file,
.unlocked_ioctl = f2fs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = f2fs_compat_ioctl,
+#endif
};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8de34ab6d5b1..a8327ed73898 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -70,6 +70,15 @@ struct f2fs_mount_info {
unsigned int opt;
};
+#define F2FS_FEATURE_ENCRYPT 0x0001
+
+#define F2FS_HAS_FEATURE(sb, mask) \
+ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
+#define F2FS_SET_FEATURE(sb, mask) \
+ F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)
+#define F2FS_CLEAR_FEATURE(sb, mask) \
+ F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)
+
#define CRCPOLY_LE 0xedb88320
static inline __u32 f2fs_crc32(void *buf, size_t len)
@@ -110,6 +119,8 @@ enum {
#define DEF_BATCHED_TRIM_SECTIONS 32
#define BATCHED_TRIM_SEGMENTS(sbi) \
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
+#define BATCHED_TRIM_BLOCKS(sbi) \
+ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
struct cp_control {
int reason;
@@ -218,6 +229,13 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
+#define F2FS_IOC_SET_ENCRYPTION_POLICY \
+ _IOR('f', 19, struct f2fs_encryption_policy)
+#define F2FS_IOC_GET_ENCRYPTION_PWSALT \
+ _IOW('f', 20, __u8[16])
+#define F2FS_IOC_GET_ENCRYPTION_POLICY \
+ _IOW('f', 21, struct f2fs_encryption_policy)
+
/*
* should be same as XFS_IOC_GOINGDOWN.
* Flags for going down operation used by FS_IOC_GOINGDOWN
@@ -239,16 +257,38 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
* For INODE and NODE manager
*/
/* for directory operations */
+struct f2fs_str {
+ unsigned char *name;
+ u32 len;
+};
+
+struct f2fs_filename {
+ const struct qstr *usr_fname;
+ struct f2fs_str disk_name;
+ f2fs_hash_t hash;
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ struct f2fs_str crypto_buf;
+#endif
+};
+
+#define FSTR_INIT(n, l) { .name = n, .len = l }
+#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
+#define fname_name(p) ((p)->disk_name.name)
+#define fname_len(p) ((p)->disk_name.len)
+
struct f2fs_dentry_ptr {
+ struct inode *inode;
const void *bitmap;
struct f2fs_dir_entry *dentry;
__u8 (*filename)[F2FS_SLOT_LEN];
int max;
};
-static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d,
- void *src, int type)
+static inline void make_dentry_ptr(struct inode *inode,
+ struct f2fs_dentry_ptr *d, void *src, int type)
{
+ d->inode = inode;
+
if (type == 1) {
struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
d->max = NR_DENTRY_IN_BLOCK;
@@ -315,10 +355,51 @@ struct extent_tree {
};
/*
+ * This structure is taken from ext4_map_blocks.
+ *
+ * Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks().
+ */
+#define F2FS_MAP_NEW (1 << BH_New)
+#define F2FS_MAP_MAPPED (1 << BH_Mapped)
+#define F2FS_MAP_UNWRITTEN (1 << BH_Unwritten)
+#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\
+ F2FS_MAP_UNWRITTEN)
+
+struct f2fs_map_blocks {
+ block_t m_pblk;
+ block_t m_lblk;
+ unsigned int m_len;
+ unsigned int m_flags;
+};
+
+/*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
*/
#define FADVISE_COLD_BIT 0x01
#define FADVISE_LOST_PINO_BIT 0x02
+#define FADVISE_ENCRYPT_BIT 0x04
+#define FADVISE_ENC_NAME_BIT 0x08
+
+#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
+#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
+#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
+#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT)
+#define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT)
+#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
+#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
+#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
+
+/* Encryption algorithms */
+#define F2FS_ENCRYPTION_MODE_INVALID 0
+#define F2FS_ENCRYPTION_MODE_AES_256_XTS 1
+#define F2FS_ENCRYPTION_MODE_AES_256_GCM 2
+#define F2FS_ENCRYPTION_MODE_AES_256_CBC 3
+#define F2FS_ENCRYPTION_MODE_AES_256_CTS 4
+
+#include "f2fs_crypto.h"
#define DEF_DIR_LEVEL 0
@@ -346,6 +427,11 @@ struct f2fs_inode_info {
struct radix_tree_root inmem_root; /* radix tree for inmem pages */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
+
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ /* Encryption params */
+ struct f2fs_crypt_info *i_crypt_info;
+#endif
};
static inline void get_extent_info(struct extent_info *ext,
@@ -571,9 +657,12 @@ enum page_type {
};
struct f2fs_io_info {
+ struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
block_t blk_addr; /* block address to be written */
+ struct page *page; /* page to be written */
+ struct page *encrypted_page; /* encrypted page */
};
#define is_read_io(rw) (((rw) & 1) == READ)
@@ -666,6 +755,7 @@ struct f2fs_sb_info {
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
block_t alloc_valid_block_count; /* # of allocated blocks */
+ block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
u32 s_next_generation; /* for NFS support */
atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */
@@ -1193,6 +1283,24 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr)
return mask & *addr;
}
+static inline void f2fs_set_bit(unsigned int nr, char *addr)
+{
+ int mask;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ *addr |= mask;
+}
+
+static inline void f2fs_clear_bit(unsigned int nr, char *addr)
+{
+ int mask;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ *addr &= ~mask;
+}
+
static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
{
int mask;
@@ -1391,6 +1499,21 @@ static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
kunmap(page);
}
+static inline int is_file(struct inode *inode, int type)
+{
+ return F2FS_I(inode)->i_advise & type;
+}
+
+static inline void set_file(struct inode *inode, int type)
+{
+ F2FS_I(inode)->i_advise |= type;
+}
+
+static inline void clear_file(struct inode *inode, int type)
+{
+ F2FS_I(inode)->i_advise &= ~type;
+}
+
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
@@ -1407,6 +1530,17 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= MS_RDONLY;
}
+static inline bool is_dot_dotdot(const struct qstr *str)
+{
+ if (str->len == 1 && str->name[0] == '.')
+ return true;
+
+ if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+ return true;
+
+ return false;
+}
+
#define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1453,10 +1587,11 @@ struct dentry *f2fs_get_parent(struct dentry *child);
*/
extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
void set_de_type(struct f2fs_dir_entry *, umode_t);
-struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
- struct f2fs_dentry_ptr *);
+
+struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *,
+ f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
- unsigned int);
+ unsigned int, struct f2fs_str *);
void do_make_empty_dir(struct inode *, struct inode *,
struct f2fs_dentry_ptr *);
struct page *init_inode_metadata(struct inode *, struct inode *,
@@ -1470,7 +1605,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
struct page *, struct inode *);
-int update_dent_inode(struct inode *, const struct qstr *);
+int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
const struct qstr *, f2fs_hash_t , unsigned int);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
@@ -1478,7 +1613,6 @@ int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
struct inode *);
int f2fs_do_tmpfile(struct inode *, struct inode *);
-int f2fs_make_empty(struct inode *, struct inode *);
bool f2fs_empty_dir(struct inode *);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
@@ -1490,6 +1624,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
/*
* super.c
*/
+int f2fs_commit_super(struct f2fs_sb_info *, bool);
int f2fs_sync_fs(struct super_block *, int);
extern __printf(3, 4)
void f2fs_msg(struct super_block *, const char *, const char *, ...);
@@ -1506,8 +1641,8 @@ struct dnode_of_data;
struct node_info;
bool available_free_memory(struct f2fs_sb_info *, int);
+int need_dentry_mark(struct f2fs_sb_info *, nid_t);
bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
-bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
@@ -1548,21 +1683,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *);
void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void clear_prefree_segments(struct f2fs_sb_info *);
+void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *);
void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
+void update_meta_page(struct f2fs_sb_info *, void *, block_t);
void write_meta_page(struct f2fs_sb_info *, struct page *);
-void write_node_page(struct f2fs_sb_info *, struct page *,
- unsigned int, struct f2fs_io_info *);
-void write_data_page(struct page *, struct dnode_of_data *,
- struct f2fs_io_info *);
-void rewrite_data_page(struct page *, struct f2fs_io_info *);
-void recover_data_page(struct f2fs_sb_info *, struct page *,
- struct f2fs_summary *, block_t, block_t);
+void write_node_page(unsigned int, struct f2fs_io_info *);
+void write_data_page(struct dnode_of_data *, struct f2fs_io_info *);
+void rewrite_data_page(struct f2fs_io_info *);
+void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
+ block_t, block_t, unsigned char, bool);
void allocate_data_block(struct f2fs_sb_info *, struct page *,
block_t, block_t *, struct f2fs_summary *, int);
void f2fs_wait_on_page_writeback(struct page *, enum page_type);
@@ -1581,6 +1715,7 @@ void destroy_segment_manager_caches(void);
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
@@ -1607,10 +1742,8 @@ void destroy_checkpoint_caches(void);
* data.c
*/
void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
-int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
- struct f2fs_io_info *);
-void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
- struct f2fs_io_info *);
+int f2fs_submit_page_bio(struct f2fs_io_info *);
+void f2fs_submit_page_mbio(struct f2fs_io_info *);
void set_data_blkaddr(struct dnode_of_data *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
@@ -1619,10 +1752,11 @@ void f2fs_destroy_extent_tree(struct inode *);
void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
void f2fs_update_extent_cache(struct dnode_of_data *);
void f2fs_preserve_extent_tree(struct inode *);
-struct page *find_data_page(struct inode *, pgoff_t, bool);
+struct page *get_read_data_page(struct inode *, pgoff_t, int);
+struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
-int do_write_data_page(struct page *, struct f2fs_io_info *);
+int do_write_data_page(struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void init_extent_cache_info(struct f2fs_sb_info *);
int __init create_extent_cache(void);
@@ -1787,13 +1921,15 @@ extern const struct address_space_operations f2fs_node_aops;
extern const struct address_space_operations f2fs_meta_aops;
extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
+extern const struct inode_operations f2fs_encrypted_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
extern struct kmem_cache *inode_entry_slab;
/*
* inline.c
*/
-bool f2fs_may_inline(struct inode *);
+bool f2fs_may_inline_data(struct inode *);
+bool f2fs_may_inline_dentry(struct inode *);
void read_inline_data(struct page *, struct page *);
bool truncate_inline_inode(struct page *, u64);
int f2fs_read_inline_data(struct inode *, struct page *);
@@ -1801,8 +1937,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
int f2fs_convert_inline_inode(struct inode *);
int f2fs_write_inline_data(struct inode *, struct page *);
bool recover_inline_data(struct inode *, struct page *);
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
- struct page **);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
+ struct f2fs_filename *, struct page **);
struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
@@ -1810,5 +1946,137 @@ int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
struct inode *, struct inode *);
bool f2fs_empty_inline_dir(struct inode *);
-int f2fs_read_inline_dir(struct file *, struct dir_context *);
+int f2fs_read_inline_dir(struct file *, struct dir_context *,
+ struct f2fs_str *);
+
+/*
+ * crypto support
+ */
+static inline int f2fs_encrypted_inode(struct inode *inode)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ return file_is_encrypt(inode);
+#else
+ return 0;
+#endif
+}
+
+static inline void f2fs_set_encrypted_inode(struct inode *inode)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ file_set_encrypt(inode);
+#endif
+}
+
+static inline bool f2fs_bio_encrypted(struct bio *bio)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ return unlikely(bio->bi_private != NULL);
+#else
+ return false;
+#endif
+}
+
+static inline int f2fs_sb_has_crypto(struct super_block *sb)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
+#else
+ return 0;
+#endif
+}
+
+static inline bool f2fs_may_encrypt(struct inode *inode)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ mode_t mode = inode->i_mode;
+
+ return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
+#else
+ return 0;
+#endif
+}
+
+/* crypto_policy.c */
+int f2fs_is_child_context_consistent_with_parent(struct inode *,
+ struct inode *);
+int f2fs_inherit_context(struct inode *, struct inode *, struct page *);
+int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *);
+int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *);
+
+/* crypt.c */
+extern struct kmem_cache *f2fs_crypt_info_cachep;
+bool f2fs_valid_contents_enc_mode(uint32_t);
+uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t);
+struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *);
+void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *);
+struct page *f2fs_encrypt(struct inode *, struct page *);
+int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *);
+int f2fs_decrypt_one(struct inode *, struct page *);
+void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *);
+
+/* crypto_key.c */
+void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *);
+int _f2fs_get_encryption_info(struct inode *inode);
+
+/* crypto_fname.c */
+bool f2fs_valid_filenames_enc_mode(uint32_t);
+u32 f2fs_fname_crypto_round_up(u32, u32);
+int f2fs_fname_crypto_alloc_buffer(struct inode *, u32, struct f2fs_str *);
+int f2fs_fname_disk_to_usr(struct inode *, f2fs_hash_t *,
+ const struct f2fs_str *, struct f2fs_str *);
+int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *,
+ struct f2fs_str *);
+
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+void f2fs_restore_and_release_control_page(struct page **);
+void f2fs_restore_control_page(struct page *);
+
+int __init f2fs_init_crypto(void);
+int f2fs_crypto_initialize(void);
+void f2fs_exit_crypto(void);
+
+int f2fs_has_encryption_key(struct inode *);
+
+static inline int f2fs_get_encryption_info(struct inode *inode)
+{
+ struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+
+ if (!ci ||
+ (ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD)))))
+ return _f2fs_get_encryption_info(inode);
+ return 0;
+}
+
+void f2fs_fname_crypto_free_buffer(struct f2fs_str *);
+int f2fs_fname_setup_filename(struct inode *, const struct qstr *,
+ int lookup, struct f2fs_filename *);
+void f2fs_fname_free_filename(struct f2fs_filename *);
+#else
+static inline void f2fs_restore_and_release_control_page(struct page **p) { }
+static inline void f2fs_restore_control_page(struct page *p) { }
+
+static inline int __init f2fs_init_crypto(void) { return 0; }
+static inline void f2fs_exit_crypto(void) { }
+
+static inline int f2fs_has_encryption_key(struct inode *i) { return 0; }
+static inline int f2fs_get_encryption_info(struct inode *i) { return 0; }
+static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { }
+
+static inline int f2fs_fname_setup_filename(struct inode *dir,
+ const struct qstr *iname,
+ int lookup, struct f2fs_filename *fname)
+{
+ memset(fname, 0, sizeof(struct f2fs_filename));
+ fname->usr_fname = iname;
+ fname->disk_name.name = (unsigned char *)iname->name;
+ fname->disk_name.len = iname->len;
+ return 0;
+}
+
+static inline void f2fs_fname_free_filename(struct f2fs_filename *fname) { }
+#endif
#endif
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
new file mode 100644
index 000000000000..c2c1c2b63b25
--- /dev/null
+++ b/fs/f2fs/f2fs_crypto.h
@@ -0,0 +1,151 @@
+/*
+ * linux/fs/f2fs/f2fs_crypto.h
+ *
+ * Copied from linux/fs/ext4/ext4_crypto.h
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption header content for f2fs
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#ifndef _F2FS_CRYPTO_H
+#define _F2FS_CRYPTO_H
+
+#include <linux/fs.h>
+
+#define F2FS_KEY_DESCRIPTOR_SIZE 8
+
+/* Policy provided via an ioctl on the topmost directory */
+struct f2fs_encryption_policy {
+ char version;
+ char contents_encryption_mode;
+ char filenames_encryption_mode;
+ char flags;
+ char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
+} __attribute__((__packed__));
+
+#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
+#define F2FS_KEY_DERIVATION_NONCE_SIZE 16
+
+#define F2FS_POLICY_FLAGS_PAD_4 0x00
+#define F2FS_POLICY_FLAGS_PAD_8 0x01
+#define F2FS_POLICY_FLAGS_PAD_16 0x02
+#define F2FS_POLICY_FLAGS_PAD_32 0x03
+#define F2FS_POLICY_FLAGS_PAD_MASK 0x03
+#define F2FS_POLICY_FLAGS_VALID 0x03
+
+/**
+ * Encryption context for inode
+ *
+ * Protector format:
+ * 1 byte: Protector format (1 = this version)
+ * 1 byte: File contents encryption mode
+ * 1 byte: File names encryption mode
+ * 1 byte: Flags
+ * 8 bytes: Master Key descriptor
+ * 16 bytes: Encryption Key derivation nonce
+ */
+struct f2fs_encryption_context {
+ char format;
+ char contents_encryption_mode;
+ char filenames_encryption_mode;
+ char flags;
+ char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
+ char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE];
+} __attribute__((__packed__));
+
+/* Encryption parameters */
+#define F2FS_XTS_TWEAK_SIZE 16
+#define F2FS_AES_128_ECB_KEY_SIZE 16
+#define F2FS_AES_256_GCM_KEY_SIZE 32
+#define F2FS_AES_256_CBC_KEY_SIZE 32
+#define F2FS_AES_256_CTS_KEY_SIZE 32
+#define F2FS_AES_256_XTS_KEY_SIZE 64
+#define F2FS_MAX_KEY_SIZE 64
+
+#define F2FS_KEY_DESC_PREFIX "f2fs:"
+#define F2FS_KEY_DESC_PREFIX_SIZE 5
+
+struct f2fs_encryption_key {
+ __u32 mode;
+ char raw[F2FS_MAX_KEY_SIZE];
+ __u32 size;
+} __attribute__((__packed__));
+
+struct f2fs_crypt_info {
+ char ci_data_mode;
+ char ci_filename_mode;
+ char ci_flags;
+ struct crypto_ablkcipher *ci_ctfm;
+ struct key *ci_keyring_key;
+ char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
+#define F2FS_WRITE_PATH_FL 0x00000002
+
+struct f2fs_crypto_ctx {
+ union {
+ struct {
+ struct page *bounce_page; /* Ciphertext page */
+ struct page *control_page; /* Original page */
+ } w;
+ struct {
+ struct bio *bio;
+ struct work_struct work;
+ } r;
+ struct list_head free_list; /* Free list */
+ };
+ char flags; /* Flags */
+};
+
+struct f2fs_completion_result {
+ struct completion completion;
+ int res;
+};
+
+#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \
+ struct f2fs_completion_result ecr = { \
+ COMPLETION_INITIALIZER((ecr).completion), 0 }
+
+static inline int f2fs_encryption_key_size(int mode)
+{
+ switch (mode) {
+ case F2FS_ENCRYPTION_MODE_AES_256_XTS:
+ return F2FS_AES_256_XTS_KEY_SIZE;
+ case F2FS_ENCRYPTION_MODE_AES_256_GCM:
+ return F2FS_AES_256_GCM_KEY_SIZE;
+ case F2FS_ENCRYPTION_MODE_AES_256_CBC:
+ return F2FS_AES_256_CBC_KEY_SIZE;
+ case F2FS_ENCRYPTION_MODE_AES_256_CTS:
+ return F2FS_AES_256_CTS_KEY_SIZE;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+#define F2FS_FNAME_NUM_SCATTER_ENTRIES 4
+#define F2FS_CRYPTO_BLOCK_SIZE 16
+#define F2FS_FNAME_CRYPTO_DIGEST_SIZE 32
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct f2fs_encrypted_symlink_data {
+ __le16 len;
+ char encrypted_path[1];
+} __attribute__((__packed__));
+
+/**
+ * This function is used to calculate the disk space required to
+ * store a filename of length l in encrypted symlink format.
+ */
+static inline u32 encrypted_symlink_data_len(u32 l)
+{
+ return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1);
+}
+#endif /* _F2FS_CRYPTO_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 2b52e48d7482..ada2a3dd701a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
#include <linux/uaccess.h>
#include <linux/mount.h>
#include <linux/pagevec.h>
+#include <linux/random.h>
#include "f2fs.h"
#include "node.h"
@@ -105,7 +106,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
- if (update_dent_inode(inode, &dentry->d_name)) {
+ if (update_dent_inode(inode, inode, &dentry->d_name)) {
dput(dentry);
return 0;
}
@@ -122,6 +123,8 @@ static inline bool need_do_checkpoint(struct inode *inode)
if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
need_cp = true;
+ else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino))
+ need_cp = true;
else if (file_wrong_pino(inode))
need_cp = true;
else if (!space_for_roll_forward(sbi))
@@ -271,7 +274,7 @@ flush_out:
ret = f2fs_issue_flush(sbi);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
- f2fs_trace_ios(NULL, NULL, 1);
+ f2fs_trace_ios(NULL, 1);
return ret;
}
@@ -407,6 +410,12 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
+ if (f2fs_encrypted_inode(inode)) {
+ int err = f2fs_get_encryption_info(inode);
+ if (err)
+ return 0;
+ }
+
/* we don't need to use inline_data strictly */
if (f2fs_has_inline_data(inode)) {
int err = f2fs_convert_inline_inode(inode);
@@ -419,6 +428,18 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+static int f2fs_file_open(struct inode *inode, struct file *filp)
+{
+ int ret = generic_file_open(inode, filp);
+
+ if (!ret && f2fs_encrypted_inode(inode)) {
+ ret = f2fs_get_encryption_info(inode);
+ if (ret)
+ ret = -EACCES;
+ }
+ return ret;
+}
+
int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
int nr_free = 0, ofs = dn->ofs_in_node;
@@ -461,28 +482,32 @@ void truncate_data_blocks(struct dnode_of_data *dn)
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
- bool force)
+ bool cache_only)
{
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
+ pgoff_t index = from >> PAGE_CACHE_SHIFT;
+ struct address_space *mapping = inode->i_mapping;
struct page *page;
- if (!offset && !force)
+ if (!offset && !cache_only)
return 0;
- page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force);
- if (IS_ERR(page))
+ if (cache_only) {
+ page = grab_cache_page(mapping, index);
+ if (page && PageUptodate(page))
+ goto truncate_out;
+ f2fs_put_page(page, 1);
return 0;
+ }
- lock_page(page);
- if (unlikely(!PageUptodate(page) ||
- page->mapping != inode->i_mapping))
- goto out;
-
+ page = get_lock_data_page(inode, index);
+ if (IS_ERR(page))
+ return 0;
+truncate_out:
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
- if (!force)
+ if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
set_page_dirty(page);
-out:
f2fs_put_page(page, 1);
return 0;
}
@@ -560,7 +585,7 @@ void f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
/* we should check inline_data size */
- if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) {
+ if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
if (f2fs_convert_inline_inode(inode))
return;
}
@@ -622,16 +647,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(inode)) {
+ if (f2fs_encrypted_inode(inode) &&
+ f2fs_get_encryption_info(inode))
+ return -EACCES;
+
+ if (attr->ia_size <= i_size_read(inode)) {
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
f2fs_balance_fs(F2FS_I_SB(inode));
} else {
/*
- * giving a chance to truncate blocks past EOF which
- * are fallocated with FALLOC_FL_KEEP_SIZE.
+ * do not trim all blocks after i_size if target size is
+ * larger than i_size.
*/
- f2fs_truncate(inode);
+ truncate_setsize(inode, attr->ia_size);
}
}
@@ -718,10 +747,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
- /* skip punching hole beyond i_size */
- if (offset >= inode->i_size)
- return ret;
-
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
@@ -765,6 +790,320 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
+static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct dnode_of_data dn;
+ pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ int ret = 0;
+
+ f2fs_lock_op(sbi);
+
+ for (; end < nrpages; start++, end++) {
+ block_t new_addr, old_addr;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA);
+ if (ret && ret != -ENOENT) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ new_addr = NULL_ADDR;
+ } else {
+ new_addr = dn.data_blkaddr;
+ truncate_data_blocks_range(&dn, 1);
+ f2fs_put_dnode(&dn);
+ }
+
+ if (new_addr == NULL_ADDR) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA);
+ if (ret && ret != -ENOENT)
+ goto out;
+ else if (ret == -ENOENT)
+ continue;
+
+ if (dn.data_blkaddr == NULL_ADDR) {
+ f2fs_put_dnode(&dn);
+ continue;
+ } else {
+ truncate_data_blocks_range(&dn, 1);
+ }
+
+ f2fs_put_dnode(&dn);
+ } else {
+ struct page *ipage;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ ret = PTR_ERR(ipage);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ ret = f2fs_reserve_block(&dn, start);
+ if (ret)
+ goto out;
+
+ old_addr = dn.data_blkaddr;
+ if (old_addr != NEW_ADDR && new_addr == NEW_ADDR) {
+ dn.data_blkaddr = NULL_ADDR;
+ f2fs_update_extent_cache(&dn);
+ invalidate_blocks(sbi, old_addr);
+
+ dn.data_blkaddr = new_addr;
+ set_data_blkaddr(&dn);
+ } else if (new_addr != NEW_ADDR) {
+ struct node_info ni;
+
+ get_node_info(sbi, dn.nid, &ni);
+ f2fs_replace_block(sbi, &dn, old_addr, new_addr,
+ ni.version, true);
+ }
+
+ f2fs_put_dnode(&dn);
+ }
+ }
+ ret = 0;
+out:
+ f2fs_unlock_op(sbi);
+ return ret;
+}
+
+static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+{
+ pgoff_t pg_start, pg_end;
+ loff_t new_size;
+ int ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (offset + len >= i_size_read(inode))
+ return -EINVAL;
+
+ /* collapse range should be aligned to block size of f2fs. */
+ if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
+ return -EINVAL;
+
+ pg_start = offset >> PAGE_CACHE_SHIFT;
+ pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+
+ /* write out all dirty pages from offset */
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ if (ret)
+ return ret;
+
+ truncate_pagecache(inode, offset);
+
+ ret = f2fs_do_collapse(inode, pg_start, pg_end);
+ if (ret)
+ return ret;
+
+ new_size = i_size_read(inode) - len;
+
+ ret = truncate_blocks(inode, new_size, true);
+ if (!ret)
+ i_size_write(inode, new_size);
+
+ return ret;
+}
+
+static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
+ int mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ pgoff_t index, pg_start, pg_end;
+ loff_t new_size = i_size_read(inode);
+ loff_t off_start, off_end;
+ int ret = 0;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ ret = inode_newsize_ok(inode, (len + offset));
+ if (ret)
+ return ret;
+
+ f2fs_balance_fs(sbi);
+
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
+ if (ret)
+ return ret;
+
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
+ pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
+ pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+
+ off_start = offset & (PAGE_CACHE_SIZE - 1);
+ off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+
+ if (pg_start == pg_end) {
+ fill_zero(inode, pg_start, off_start, off_end - off_start);
+ if (offset + len > new_size)
+ new_size = offset + len;
+ new_size = max_t(loff_t, new_size, offset + len);
+ } else {
+ if (off_start) {
+ fill_zero(inode, pg_start++, off_start,
+ PAGE_CACHE_SIZE - off_start);
+ new_size = max_t(loff_t, new_size,
+ pg_start << PAGE_CACHE_SHIFT);
+ }
+
+ for (index = pg_start; index < pg_end; index++) {
+ struct dnode_of_data dn;
+ struct page *ipage;
+
+ f2fs_lock_op(sbi);
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ ret = PTR_ERR(ipage);
+ f2fs_unlock_op(sbi);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ ret = f2fs_reserve_block(&dn, index);
+ if (ret) {
+ f2fs_unlock_op(sbi);
+ goto out;
+ }
+
+ if (dn.data_blkaddr != NEW_ADDR) {
+ invalidate_blocks(sbi, dn.data_blkaddr);
+
+ dn.data_blkaddr = NEW_ADDR;
+ set_data_blkaddr(&dn);
+
+ dn.data_blkaddr = NULL_ADDR;
+ f2fs_update_extent_cache(&dn);
+ }
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+
+ new_size = max_t(loff_t, new_size,
+ (index + 1) << PAGE_CACHE_SHIFT);
+ }
+
+ if (off_end) {
+ fill_zero(inode, pg_end, 0, off_end);
+ new_size = max_t(loff_t, new_size, offset + len);
+ }
+ }
+
+out:
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) {
+ i_size_write(inode, new_size);
+ mark_inode_dirty(inode);
+ update_inode_page(inode);
+ }
+
+ return ret;
+}
+
+static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ pgoff_t pg_start, pg_end, delta, nrpages, idx;
+ loff_t new_size;
+ int ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ new_size = i_size_read(inode) + len;
+ if (new_size > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ if (offset >= i_size_read(inode))
+ return -EINVAL;
+
+ /* insert range should be aligned to block size of f2fs. */
+ if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
+ return -EINVAL;
+
+ f2fs_balance_fs(sbi);
+
+ ret = truncate_blocks(inode, i_size_read(inode), true);
+ if (ret)
+ return ret;
+
+ /* write out all dirty pages from offset */
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ if (ret)
+ return ret;
+
+ truncate_pagecache(inode, offset);
+
+ pg_start = offset >> PAGE_CACHE_SHIFT;
+ pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+ delta = pg_end - pg_start;
+ nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) {
+ struct dnode_of_data dn;
+ struct page *ipage;
+ block_t new_addr, old_addr;
+
+ f2fs_lock_op(sbi);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = get_dnode_of_data(&dn, idx, LOOKUP_NODE_RA);
+ if (ret && ret != -ENOENT) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ goto next;
+ } else if (dn.data_blkaddr == NULL_ADDR) {
+ f2fs_put_dnode(&dn);
+ goto next;
+ } else {
+ new_addr = dn.data_blkaddr;
+ truncate_data_blocks_range(&dn, 1);
+ f2fs_put_dnode(&dn);
+ }
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ ret = PTR_ERR(ipage);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ ret = f2fs_reserve_block(&dn, idx + delta);
+ if (ret)
+ goto out;
+
+ old_addr = dn.data_blkaddr;
+ f2fs_bug_on(sbi, old_addr != NEW_ADDR);
+
+ if (new_addr != NEW_ADDR) {
+ struct node_info ni;
+
+ get_node_info(sbi, dn.nid, &ni);
+ f2fs_replace_block(sbi, &dn, old_addr, new_addr,
+ ni.version, true);
+ }
+ f2fs_put_dnode(&dn);
+next:
+ f2fs_unlock_op(sbi);
+ }
+
+ i_size_write(inode, new_size);
+ return 0;
+out:
+ f2fs_unlock_op(sbi);
+ return ret;
+}
+
static int expand_inode_data(struct inode *inode, loff_t offset,
loff_t len, int mode)
{
@@ -830,23 +1169,40 @@ static long f2fs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- long ret;
+ long ret = 0;
+
+ if (f2fs_encrypted_inode(inode) &&
+ (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
+ return -EOPNOTSUPP;
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
+ FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
mutex_lock(&inode->i_mutex);
- if (mode & FALLOC_FL_PUNCH_HOLE)
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if (offset >= inode->i_size)
+ goto out;
+
ret = punch_hole(inode, offset, len);
- else
+ } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+ ret = f2fs_collapse_range(inode, offset, len);
+ } else if (mode & FALLOC_FL_ZERO_RANGE) {
+ ret = f2fs_zero_range(inode, offset, len, mode);
+ } else if (mode & FALLOC_FL_INSERT_RANGE) {
+ ret = f2fs_insert_range(inode, offset, len);
+ } else {
ret = expand_inode_data(inode, offset, len, mode);
+ }
if (!ret) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
+out:
mutex_unlock(&inode->i_mutex);
trace_f2fs_fallocate(inode, mode, offset, len, ret);
@@ -1035,11 +1391,9 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
}
- if (f2fs_is_volatile_file(inode)) {
+ if (f2fs_is_volatile_file(inode))
clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- filemap_fdatawrite(inode->i_mapping);
- set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- }
+
mnt_drop_write_file(filp);
return ret;
}
@@ -1109,6 +1463,86 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
return 0;
}
+static bool uuid_is_nonzero(__u8 u[16])
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (u[i])
+ return true;
+ return false;
+}
+
+static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ struct f2fs_encryption_policy policy;
+ struct inode *inode = file_inode(filp);
+
+ if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg,
+ sizeof(policy)))
+ return -EFAULT;
+
+ return f2fs_process_policy(&policy, inode);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
+{
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ struct f2fs_encryption_policy policy;
+ struct inode *inode = file_inode(filp);
+ int err;
+
+ err = f2fs_get_policy(inode, &policy);
+ if (err)
+ return err;
+
+ if (copy_to_user((struct f2fs_encryption_policy __user *)arg, &policy,
+ sizeof(policy)))
+ return -EFAULT;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int err;
+
+ if (!f2fs_sb_has_crypto(inode->i_sb))
+ return -EOPNOTSUPP;
+
+ if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
+ goto got_it;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ /* update superblock with uuid */
+ generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
+
+ err = f2fs_commit_super(sbi, false);
+
+ mnt_drop_write_file(filp);
+ if (err) {
+ /* undo new data */
+ memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
+ return err;
+ }
+got_it:
+ if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
+ 16))
+ return -EFAULT;
+ return 0;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -1132,11 +1566,29 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_shutdown(filp, arg);
case FITRIM:
return f2fs_ioc_fitrim(filp, arg);
+ case F2FS_IOC_SET_ENCRYPTION_POLICY:
+ return f2fs_ioc_set_encryption_policy(filp, arg);
+ case F2FS_IOC_GET_ENCRYPTION_POLICY:
+ return f2fs_ioc_get_encryption_policy(filp, arg);
+ case F2FS_IOC_GET_ENCRYPTION_PWSALT:
+ return f2fs_ioc_get_encryption_pwsalt(filp, arg);
default:
return -ENOTTY;
}
}
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (f2fs_encrypted_inode(inode) &&
+ !f2fs_has_encryption_key(inode) &&
+ f2fs_get_encryption_info(inode))
+ return -EACCES;
+
+ return generic_file_write_iter(iocb, from);
+}
+
#ifdef CONFIG_COMPAT
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
@@ -1157,8 +1609,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
const struct file_operations f2fs_file_operations = {
.llseek = f2fs_llseek,
.read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = generic_file_open,
+ .write_iter = f2fs_file_write_iter,
+ .open = f2fs_file_open,
.release = f2fs_release_file,
.mmap = f2fs_file_mmap,
.fsync = f2fs_sync_file,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ed58211fe79b..e1e73617d13b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -518,12 +518,79 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return 1;
}
-static void move_data_page(struct inode *inode, struct page *page, int gc_type)
+static void move_encrypted_block(struct inode *inode, block_t bidx)
{
struct f2fs_io_info fio = {
+ .sbi = F2FS_I_SB(inode),
.type = DATA,
- .rw = WRITE_SYNC,
+ .rw = READ_SYNC,
+ .encrypted_page = NULL,
};
+ struct dnode_of_data dn;
+ struct f2fs_summary sum;
+ struct node_info ni;
+ struct page *page;
+ int err;
+
+ /* do not read out */
+ page = grab_cache_page(inode->i_mapping, bidx);
+ if (!page)
+ return;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
+ if (err)
+ goto out;
+
+ if (unlikely(dn.data_blkaddr == NULL_ADDR))
+ goto put_out;
+
+ get_node_info(fio.sbi, dn.nid, &ni);
+ set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
+
+ /* read page */
+ fio.page = page;
+ fio.blk_addr = dn.data_blkaddr;
+
+ fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr);
+ if (!fio.encrypted_page)
+ goto put_out;
+
+ f2fs_submit_page_bio(&fio);
+
+ /* allocate block address */
+ f2fs_wait_on_page_writeback(dn.node_page, NODE);
+
+ allocate_data_block(fio.sbi, NULL, fio.blk_addr,
+ &fio.blk_addr, &sum, CURSEG_COLD_DATA);
+ dn.data_blkaddr = fio.blk_addr;
+
+ /* write page */
+ lock_page(fio.encrypted_page);
+ set_page_writeback(fio.encrypted_page);
+ fio.rw = WRITE_SYNC;
+ f2fs_submit_page_mbio(&fio);
+
+ set_data_blkaddr(&dn);
+ f2fs_update_extent_cache(&dn);
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ if (page->index == 0)
+ set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+
+ f2fs_put_page(fio.encrypted_page, 1);
+put_out:
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_put_page(page, 1);
+}
+
+static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
+{
+ struct page *page;
+
+ page = get_lock_data_page(inode, bidx);
+ if (IS_ERR(page))
+ return;
if (gc_type == BG_GC) {
if (PageWriteback(page))
@@ -531,12 +598,19 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
set_page_dirty(page);
set_cold_data(page);
} else {
+ struct f2fs_io_info fio = {
+ .sbi = F2FS_I_SB(inode),
+ .type = DATA,
+ .rw = WRITE_SYNC,
+ .page = page,
+ .encrypted_page = NULL,
+ };
f2fs_wait_on_page_writeback(page, DATA);
if (clear_page_dirty_for_io(page))
inode_dec_dirty_pages(inode);
set_cold_data(page);
- do_write_data_page(page, &fio);
+ do_write_data_page(&fio);
clear_cold_data(page);
}
out:
@@ -599,10 +673,16 @@ next_step:
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
- start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+ /* if encrypted inode, let's go phase 3 */
+ if (f2fs_encrypted_inode(inode) &&
+ S_ISREG(inode->i_mode)) {
+ add_gc_inode(gc_list, inode);
+ continue;
+ }
- data_page = find_data_page(inode,
- start_bidx + ofs_in_node, false);
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+ data_page = get_read_data_page(inode,
+ start_bidx + ofs_in_node, READA);
if (IS_ERR(data_page)) {
iput(inode);
continue;
@@ -616,12 +696,12 @@ next_step:
/* phase 3 */
inode = find_gc_inode(gc_list, dni.ino);
if (inode) {
- start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
- data_page = get_lock_data_page(inode,
- start_bidx + ofs_in_node);
- if (IS_ERR(data_page))
- continue;
- move_data_page(inode, data_page, gc_type);
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
+ + ofs_in_node;
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ move_encrypted_block(inode, start_bidx);
+ else
+ move_data_page(inode, start_bidx, gc_type);
stat_inc_data_blk_count(sbi, 1, gc_type);
}
}
@@ -670,6 +750,15 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
sum = page_address(sum_page);
+ /*
+ * this is to avoid deadlock:
+ * - lock_page(sum_page) - f2fs_replace_block
+ * - check_valid_map() - mutex_lock(sentry_lock)
+ * - mutex_lock(sentry_lock) - change_curseg()
+ * - lock_page(sum_page)
+ */
+ unlock_page(sum_page);
+
switch (GET_SUM_TYPE((&sum->footer))) {
case SUM_TYPE_NODE:
gc_node_segment(sbi, sum->entries, segno, gc_type);
@@ -683,7 +772,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
stat_inc_call_count(sbi->stat_info);
- f2fs_put_page(sum_page, 1);
+ f2fs_put_page(sum_page, 0);
}
int f2fs_gc(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index a844fcfb9a8d..71b7206c431e 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -79,8 +79,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
const unsigned char *name = name_info->name;
size_t len = name_info->len;
- if ((len <= 2) && (name[0] == '.') &&
- (name[1] == '.' || name[1] == '\0'))
+ if (is_dot_dotdot(name_info))
return 0;
/* Initialize the default seed for the hash checksum functions */
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 8140e4f0e538..38e75fb1e488 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -13,7 +13,7 @@
#include "f2fs.h"
-bool f2fs_may_inline(struct inode *inode)
+bool f2fs_may_inline_data(struct inode *inode)
{
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
return false;
@@ -27,6 +27,20 @@ bool f2fs_may_inline(struct inode *inode)
if (i_size_read(inode) > MAX_INLINE_DATA)
return false;
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return false;
+
+ return true;
+}
+
+bool f2fs_may_inline_dentry(struct inode *inode)
+{
+ if (!test_opt(F2FS_I_SB(inode), INLINE_DENTRY))
+ return false;
+
+ if (!S_ISDIR(inode->i_mode))
+ return false;
+
return true;
}
@@ -95,8 +109,11 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
void *src_addr, *dst_addr;
struct f2fs_io_info fio = {
+ .sbi = F2FS_I_SB(dn->inode),
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
+ .page = page,
+ .encrypted_page = NULL,
};
int dirty, err;
@@ -130,7 +147,7 @@ no_update:
/* write data page to try to make data consistent */
set_page_writeback(page);
fio.blk_addr = dn->data_blkaddr;
- write_data_page(page, dn, &fio);
+ write_data_page(dn, &fio);
set_data_blkaddr(dn);
f2fs_update_extent_cache(dn);
f2fs_wait_on_page_writeback(page, DATA);
@@ -267,23 +284,26 @@ process_inline:
}
struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
- struct qstr *name, struct page **res_page)
+ struct f2fs_filename *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_inline_dentry *inline_dentry;
+ struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
struct page *ipage;
+ f2fs_hash_t namehash;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return NULL;
- inline_dentry = inline_data_addr(ipage);
+ namehash = f2fs_dentry_hash(&name);
- make_dentry_ptr(&d, (void *)inline_dentry, 2);
- de = find_target_dentry(name, NULL, &d);
+ inline_dentry = inline_data_addr(ipage);
+ make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
+ de = find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
*res_page = ipage;
@@ -325,7 +345,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
dentry_blk = inline_data_addr(ipage);
- make_dentry_ptr(&d, (void *)dentry_blk, 2);
+ make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
@@ -429,7 +449,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
f2fs_wait_on_page_writeback(ipage, NODE);
name_hash = f2fs_dentry_hash(name);
- make_dentry_ptr(&d, (void *)dentry_blk, 2);
+ make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -506,7 +526,8 @@ bool f2fs_empty_inline_dir(struct inode *dir)
return true;
}
-int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx)
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
+ struct f2fs_str *fstr)
{
struct inode *inode = file_inode(file);
struct f2fs_inline_dentry *inline_dentry = NULL;
@@ -522,9 +543,9 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx)
inline_dentry = inline_data_addr(ipage);
- make_dentry_ptr(&d, (void *)inline_dentry, 2);
+ make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
- if (!f2fs_fill_dentries(ctx, &d, 0))
+ if (!f2fs_fill_dentries(ctx, &d, 0, fstr))
ctx->pos = NR_INLINE_DENTRY;
f2fs_put_page(ipage, 1);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e622ec95409e..2550868dc651 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -198,7 +198,10 @@ make_now:
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
} else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &f2fs_symlink_inode_operations;
+ if (f2fs_encrypted_inode(inode))
+ inode->i_op = &f2fs_encrypted_symlink_inode_operations;
+ else
+ inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
@@ -359,6 +362,10 @@ no_delete:
if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
out_clear:
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ if (F2FS_I(inode)->i_crypt_info)
+ f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info);
+#endif
clear_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 658e8079aaf9..fdbae21ee8fb 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -56,11 +56,18 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
goto out;
}
- if (f2fs_may_inline(inode))
+ /* If the directory encrypted, then we should encrypt the inode. */
+ if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
+ f2fs_set_encrypted_inode(inode);
+
+ if (f2fs_may_inline_data(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
- if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode))
+ if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+ stat_inc_inline_inode(inode);
+ stat_inc_inline_dir(inode);
+
trace_f2fs_new_inode(inode, 0);
mark_inode_dirty(inode);
return inode;
@@ -136,7 +143,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
alloc_nid_done(sbi, ino);
- stat_inc_inline_inode(inode);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
@@ -155,6 +161,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
int err;
+ if (f2fs_encrypted_inode(dir) &&
+ !f2fs_is_child_context_consistent_with_parent(dir, inode))
+ return -EPERM;
+
f2fs_balance_fs(sbi);
inode->i_ctime = CURRENT_TIME;
@@ -232,32 +242,34 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct f2fs_dir_entry *de;
struct page *page;
+ nid_t ino;
+ int err = 0;
if (dentry->d_name.len > F2FS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
- if (de) {
- nid_t ino = le32_to_cpu(de->ino);
- f2fs_dentry_kunmap(dir, page);
- f2fs_put_page(page, 0);
+ if (!de)
+ return d_splice_alias(inode, dentry);
- inode = f2fs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ ino = le32_to_cpu(de->ino);
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
- if (f2fs_has_inline_dots(inode)) {
- int err;
+ inode = f2fs_iget(dir->i_sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
- err = __recover_dot_dentries(inode, dir->i_ino);
- if (err) {
- iget_failed(inode);
- return ERR_PTR(err);
- }
- }
+ if (f2fs_has_inline_dots(inode)) {
+ err = __recover_dot_dentries(inode, dir->i_ino);
+ if (err)
+ goto err_out;
}
-
return d_splice_alias(inode, dentry);
+
+err_out:
+ iget_failed(inode);
+ return ERR_PTR(err);
}
static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
@@ -296,19 +308,15 @@ fail:
return err;
}
-static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *f2fs_follow_link(struct dentry *dentry, void **cookie)
{
- struct page *page = page_follow_link_light(dentry, nd);
-
- if (IS_ERR_OR_NULL(page))
- return page;
-
- /* this is broken symlink case */
- if (*nd_get_link(nd) == 0) {
- page_put_link(dentry, nd, page);
- return ERR_PTR(-ENOENT);
+ const char *link = page_follow_link_light(dentry, cookie);
+ if (!IS_ERR(link) && !*link) {
+ /* this is broken symlink case */
+ page_put_link(NULL, *cookie);
+ link = ERR_PTR(-ENOENT);
}
- return page;
+ return link;
}
static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
@@ -316,16 +324,26 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
- size_t symlen = strlen(symname) + 1;
+ size_t len = strlen(symname);
+ size_t p_len;
+ char *p_str;
+ struct f2fs_str disk_link = FSTR_INIT(NULL, 0);
+ struct f2fs_encrypted_symlink_data *sd = NULL;
int err;
+ if (len > dir->i_sb->s_blocksize)
+ return -ENAMETOOLONG;
+
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
- inode->i_op = &f2fs_symlink_inode_operations;
+ if (f2fs_encrypted_inode(inode))
+ inode->i_op = &f2fs_encrypted_symlink_inode_operations;
+ else
+ inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_lock_op(sbi);
@@ -333,10 +351,46 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
goto out;
f2fs_unlock_op(sbi);
-
- err = page_symlink(inode, symname, symlen);
alloc_nid_done(sbi, inode->i_ino);
+ if (f2fs_encrypted_inode(dir)) {
+ struct qstr istr = QSTR_INIT(symname, len);
+
+ err = f2fs_get_encryption_info(inode);
+ if (err)
+ goto err_out;
+
+ err = f2fs_fname_crypto_alloc_buffer(inode, len, &disk_link);
+ if (err)
+ goto err_out;
+
+ err = f2fs_fname_usr_to_disk(inode, &istr, &disk_link);
+ if (err < 0)
+ goto err_out;
+
+ p_len = encrypted_symlink_data_len(disk_link.len) + 1;
+
+ if (p_len > dir->i_sb->s_blocksize) {
+ err = -ENAMETOOLONG;
+ goto err_out;
+ }
+
+ sd = kzalloc(p_len, GFP_NOFS);
+ if (!sd) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ memcpy(sd->encrypted_path, disk_link.name, disk_link.len);
+ sd->len = cpu_to_le16(disk_link.len);
+ p_str = (char *)sd;
+ } else {
+ p_len = len + 1;
+ p_str = (char *)symname;
+ }
+
+ err = page_symlink(inode, p_str, p_len);
+
+err_out:
d_instantiate(dentry, inode);
unlock_new_inode(inode);
@@ -349,10 +403,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
* If the symlink path is stored into inline_data, there is no
* performance regression.
*/
- filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1);
+ if (!err)
+ filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
+
+ kfree(sd);
+ f2fs_fname_crypto_free_buffer(&disk_link);
return err;
out:
handle_failed_inode(inode);
@@ -383,7 +441,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
f2fs_unlock_op(sbi);
- stat_inc_inline_dir(inode);
alloc_nid_done(sbi, inode->i_ino);
d_instantiate(dentry, inode);
@@ -445,19 +502,101 @@ out:
return err;
}
+static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
+ umode_t mode, struct inode **whiteout)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct inode *inode;
+ int err;
+
+ if (!whiteout)
+ f2fs_balance_fs(sbi);
+
+ inode = f2fs_new_inode(dir, mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (whiteout) {
+ init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
+ inode->i_op = &f2fs_special_inode_operations;
+ } else {
+ inode->i_op = &f2fs_file_inode_operations;
+ inode->i_fop = &f2fs_file_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ }
+
+ f2fs_lock_op(sbi);
+ err = acquire_orphan_inode(sbi);
+ if (err)
+ goto out;
+
+ err = f2fs_do_tmpfile(inode, dir);
+ if (err)
+ goto release_out;
+
+ /*
+ * add this non-linked tmpfile to orphan list, in this way we could
+ * remove all unused data of tmpfile after abnormal power-off.
+ */
+ add_orphan_inode(sbi, inode->i_ino);
+ f2fs_unlock_op(sbi);
+
+ alloc_nid_done(sbi, inode->i_ino);
+
+ if (whiteout) {
+ inode_dec_link_count(inode);
+ *whiteout = inode;
+ } else {
+ d_tmpfile(dentry, inode);
+ }
+ unlock_new_inode(inode);
+ return 0;
+
+release_out:
+ release_orphan_inode(sbi);
+out:
+ handle_failed_inode(inode);
+ return err;
+}
+
+static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ if (f2fs_encrypted_inode(dir)) {
+ int err = f2fs_get_encryption_info(dir);
+ if (err)
+ return err;
+ }
+
+ return __f2fs_tmpfile(dir, dentry, mode, NULL);
+}
+
+static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
+{
+ return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
+}
+
static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
+ struct inode *whiteout = NULL;
struct page *old_dir_page;
- struct page *old_page, *new_page;
+ struct page *old_page, *new_page = NULL;
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
int err = -ENOENT;
+ if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
+ !f2fs_is_child_context_consistent_with_parent(new_dir,
+ old_inode)) {
+ err = -EPERM;
+ goto out;
+ }
+
f2fs_balance_fs(sbi);
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
@@ -471,17 +610,23 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_old;
}
+ if (flags & RENAME_WHITEOUT) {
+ err = f2fs_create_whiteout(old_dir, &whiteout);
+ if (err)
+ goto out_dir;
+ }
+
if (new_inode) {
err = -ENOTEMPTY;
if (old_dir_entry && !f2fs_empty_dir(new_inode))
- goto out_dir;
+ goto out_whiteout;
err = -ENOENT;
new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
&new_page);
if (!new_entry)
- goto out_dir;
+ goto out_whiteout;
f2fs_lock_op(sbi);
@@ -489,7 +634,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto put_out_dir;
- if (update_dent_inode(old_inode, &new_dentry->d_name)) {
+ if (update_dent_inode(old_inode, new_inode,
+ &new_dentry->d_name)) {
release_orphan_inode(sbi);
goto put_out_dir;
}
@@ -518,7 +664,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
err = f2fs_add_link(new_dentry, old_inode);
if (err) {
f2fs_unlock_op(sbi);
- goto out_dir;
+ goto out_whiteout;
}
if (old_dir_entry) {
@@ -529,6 +675,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
down_write(&F2FS_I(old_inode)->i_sem);
file_lost_pino(old_inode);
+ if (new_inode && file_enc_name(new_inode))
+ file_set_enc_name(old_inode);
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = CURRENT_TIME;
@@ -536,8 +684,18 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
+ if (whiteout) {
+ whiteout->i_state |= I_LINKABLE;
+ set_inode_flag(F2FS_I(whiteout), FI_INC_LINK);
+ err = f2fs_add_link(old_dentry, whiteout);
+ if (err)
+ goto put_out_dir;
+ whiteout->i_state &= ~I_LINKABLE;
+ iput(whiteout);
+ }
+
if (old_dir_entry) {
- if (old_dir != new_dir) {
+ if (old_dir != new_dir && !whiteout) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
update_inode_page(old_inode);
@@ -558,8 +716,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
put_out_dir:
f2fs_unlock_op(sbi);
- f2fs_dentry_kunmap(new_dir, new_page);
- f2fs_put_page(new_page, 0);
+ if (new_page) {
+ f2fs_dentry_kunmap(new_dir, new_page);
+ f2fs_put_page(new_page, 0);
+ }
+out_whiteout:
+ if (whiteout)
+ iput(whiteout);
out_dir:
if (old_dir_entry) {
f2fs_dentry_kunmap(old_inode, old_dir_page);
@@ -585,6 +748,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
+ if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
+ (old_dir != new_dir) &&
+ (!f2fs_is_child_context_consistent_with_parent(new_dir,
+ old_inode) ||
+ !f2fs_is_child_context_consistent_with_parent(old_dir,
+ new_inode)))
+ return -EPERM;
+
f2fs_balance_fs(sbi);
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
@@ -631,13 +802,17 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_lock_op(sbi);
- err = update_dent_inode(old_inode, &new_dentry->d_name);
+ err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
if (err)
goto out_unlock;
+ if (file_enc_name(new_inode))
+ file_set_enc_name(old_inode);
- err = update_dent_inode(new_inode, &old_dentry->d_name);
+ err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name);
if (err)
goto out_undo;
+ if (file_enc_name(old_inode))
+ file_set_enc_name(new_inode);
/* update ".." directory entry info of old dentry */
if (old_dir_entry)
@@ -695,8 +870,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_sync_fs(sbi->sb, 1);
return 0;
out_undo:
- /* Still we may fail to recover name info of f2fs_inode here */
- update_dent_inode(old_inode, &old_dentry->d_name);
+ /*
+ * Still we may fail to recover name info of f2fs_inode here
+ * Drop it, once its name is set as encrypted
+ */
+ update_dent_inode(old_inode, old_inode, &old_dentry->d_name);
out_unlock:
f2fs_unlock_op(sbi);
out_new_dir:
@@ -723,7 +901,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags & RENAME_EXCHANGE) {
@@ -734,53 +912,85 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
* VFS has already handled the new dentry existence case,
* here, we just deal with "RENAME_NOREPLACE" as regular rename.
*/
- return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
+ return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
-static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
- struct inode *inode;
- int err;
-
- inode = f2fs_new_inode(dir, mode);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- inode->i_op = &f2fs_file_inode_operations;
- inode->i_fop = &f2fs_file_operations;
- inode->i_mapping->a_ops = &f2fs_dblock_aops;
-
- f2fs_lock_op(sbi);
- err = acquire_orphan_inode(sbi);
- if (err)
- goto out;
-
- err = f2fs_do_tmpfile(inode, dir);
- if (err)
- goto release_out;
-
- /*
- * add this non-linked tmpfile to orphan list, in this way we could
- * remove all unused data of tmpfile after abnormal power-off.
- */
- add_orphan_inode(sbi, inode->i_ino);
- f2fs_unlock_op(sbi);
-
- alloc_nid_done(sbi, inode->i_ino);
+ struct page *cpage = NULL;
+ char *caddr, *paddr = NULL;
+ struct f2fs_str cstr;
+ struct f2fs_str pstr = FSTR_INIT(NULL, 0);
+ struct inode *inode = d_inode(dentry);
+ struct f2fs_encrypted_symlink_data *sd;
+ loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
+ u32 max_size = inode->i_sb->s_blocksize;
+ int res;
+
+ res = f2fs_get_encryption_info(inode);
+ if (res)
+ return ERR_PTR(res);
+
+ cpage = read_mapping_page(inode->i_mapping, 0, NULL);
+ if (IS_ERR(cpage))
+ return ERR_CAST(cpage);
+ caddr = kmap(cpage);
+ caddr[size] = 0;
+
+ /* Symlink is encrypted */
+ sd = (struct f2fs_encrypted_symlink_data *)caddr;
+ cstr.name = sd->encrypted_path;
+ cstr.len = le16_to_cpu(sd->len);
- stat_inc_inline_inode(inode);
- d_tmpfile(dentry, inode);
- unlock_new_inode(inode);
- return 0;
+ /* this is broken symlink case */
+ if (cstr.name[0] == 0 && cstr.len == 0) {
+ res = -ENOENT;
+ goto errout;
+ }
-release_out:
- release_orphan_inode(sbi);
-out:
- handle_failed_inode(inode);
- return err;
+ if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) >
+ max_size) {
+ /* Symlink data on the disk is corrupted */
+ res = -EIO;
+ goto errout;
+ }
+ res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr);
+ if (res)
+ goto errout;
+
+ res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
+ if (res < 0)
+ goto errout;
+
+ paddr = pstr.name;
+
+ /* Null-terminate the name */
+ paddr[res] = '\0';
+
+ kunmap(cpage);
+ page_cache_release(cpage);
+ return *cookie = paddr;
+errout:
+ f2fs_fname_crypto_free_buffer(&pstr);
+ kunmap(cpage);
+ page_cache_release(cpage);
+ return ERR_PTR(res);
}
+const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = f2fs_encrypted_follow_link,
+ .put_link = kfree_put_link,
+ .getattr = f2fs_getattr,
+ .setattr = f2fs_setattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = f2fs_listxattr,
+ .removexattr = generic_removexattr,
+};
+#endif
+
const struct inode_operations f2fs_dir_inode_operations = {
.create = f2fs_create,
.lookup = f2fs_lookup,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8ab0cf1930bd..d9c52424bac2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -195,32 +195,35 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
-bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
+int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
- bool is_cp = true;
+ bool need = false;
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
- if (e && !get_nat_flag(e, IS_CHECKPOINTED))
- is_cp = false;
+ if (e) {
+ if (!get_nat_flag(e, IS_CHECKPOINTED) &&
+ !get_nat_flag(e, HAS_FSYNCED_INODE))
+ need = true;
+ }
up_read(&nm_i->nat_tree_lock);
- return is_cp;
+ return need;
}
-bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
+bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
- bool fsynced = false;
+ bool is_cp = true;
down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, ino);
- if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
- fsynced = true;
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e && !get_nat_flag(e, IS_CHECKPOINTED))
+ is_cp = false;
up_read(&nm_i->nat_tree_lock);
- return fsynced;
+ return is_cp;
}
bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
@@ -312,7 +315,8 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
__set_nat_cache_dirty(nm_i, e);
/* update fsync_mark if its inode nat entry is still alive */
- e = __lookup_nat_cache(nm_i, ni->ino);
+ if (ni->nid != ni->ino)
+ e = __lookup_nat_cache(nm_i, ni->ino);
if (e) {
if (fsync_done && ni->nid == ni->ino)
set_nat_flag(e, HAS_FSYNCED_INODE, true);
@@ -995,8 +999,11 @@ static int read_node_page(struct page *page, int rw)
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct node_info ni;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = NODE,
.rw = rw,
+ .page = page,
+ .encrypted_page = NULL,
};
get_node_info(sbi, page->index, &ni);
@@ -1011,7 +1018,7 @@ static int read_node_page(struct page *page, int rw)
return LOCKED_PAGE;
fio.blk_addr = ni.blk_addr;
- return f2fs_submit_page_bio(sbi, page, &fio);
+ return f2fs_submit_page_bio(&fio);
}
/*
@@ -1204,13 +1211,9 @@ continue_unlock:
/* called by fsync() */
if (ino && IS_DNODE(page)) {
set_fsync_mark(page, 1);
- if (IS_INODE(page)) {
- if (!is_checkpointed_node(sbi, ino) &&
- !has_fsynced_inode(sbi, ino))
- set_dentry_mark(page, 1);
- else
- set_dentry_mark(page, 0);
- }
+ if (IS_INODE(page))
+ set_dentry_mark(page,
+ need_dentry_mark(sbi, ino));
nwritten++;
} else {
set_fsync_mark(page, 0);
@@ -1293,8 +1296,11 @@ static int f2fs_write_node_page(struct page *page,
nid_t nid;
struct node_info ni;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = NODE,
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ .page = page,
+ .encrypted_page = NULL,
};
trace_f2fs_writepage(page, NODE);
@@ -1329,7 +1335,7 @@ static int f2fs_write_node_page(struct page *page,
set_page_writeback(page);
fio.blk_addr = ni.blk_addr;
- write_node_page(sbi, page, nid, &fio);
+ write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c56026f1725c..7427e956ad81 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -343,28 +343,6 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
* - Mark cold node blocks in their node footer
* - Mark cold data pages in page cache
*/
-static inline int is_file(struct inode *inode, int type)
-{
- return F2FS_I(inode)->i_advise & type;
-}
-
-static inline void set_file(struct inode *inode, int type)
-{
- F2FS_I(inode)->i_advise |= type;
-}
-
-static inline void clear_file(struct inode *inode, int type)
-{
- F2FS_I(inode)->i_advise &= ~type;
-}
-
-#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
-#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
-#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
-#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
-#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
-#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT)
-
static inline int is_cold_data(struct page *page)
{
return PageChecked(page);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 8d8ea99f2156..24a8c1d4f45f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -83,6 +83,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
goto out;
}
+ if (file_enc_name(inode)) {
+ iput(dir);
+ return 0;
+ }
+
name.len = le32_to_cpu(raw_inode->i_namelen);
name.name = raw_inode->i_name;
@@ -143,6 +148,7 @@ out:
static void recover_inode(struct inode *inode, struct page *page)
{
struct f2fs_inode *raw = F2FS_INODE(page);
+ char *name;
inode->i_mode = le16_to_cpu(raw->i_mode);
i_size_write(inode, le64_to_cpu(raw->i_size));
@@ -153,8 +159,13 @@ static void recover_inode(struct inode *inode, struct page *page)
inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+ if (file_enc_name(inode))
+ name = "<encrypted>";
+ else
+ name = F2FS_INODE(page)->i_name;
+
f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
- ino_of_node(page), F2FS_INODE(page)->i_name);
+ ino_of_node(page), name);
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
@@ -174,7 +185,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
while (1) {
struct fsync_inode_entry *entry;
- if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
+ if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = get_meta_page(sbi, blkaddr);
@@ -349,7 +360,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int start, end;
struct dnode_of_data dn;
- struct f2fs_summary sum;
struct node_info ni;
int err = 0, recovered = 0;
@@ -396,7 +406,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
dest = datablock_addr(page, dn.ofs_in_node);
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
- dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) {
+ is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
@@ -409,13 +419,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (err)
goto err;
- set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
-
/* write dummy data page */
- recover_data_page(sbi, NULL, &sum, src, dest);
- dn.data_blkaddr = dest;
- set_data_blkaddr(&dn);
- f2fs_update_extent_cache(&dn);
+ f2fs_replace_block(sbi, &dn, src, dest,
+ ni.version, false);
recovered++;
}
dn.ofs_in_node++;
@@ -454,7 +460,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
while (1) {
struct fsync_inode_entry *entry;
- if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
+ if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
ra_meta_pages_cond(sbi, blkaddr);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f939660941bb..1eb343768781 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -75,6 +75,14 @@ static inline unsigned long __reverse_ffs(unsigned long word)
static unsigned long __find_rev_next_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
+ while (!f2fs_test_bit(offset, (unsigned char *)addr))
+ offset++;
+
+ if (offset > size)
+ offset = size;
+
+ return offset;
+#if 0
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
@@ -121,11 +129,20 @@ found_first:
return result + size; /* Nope. */
found_middle:
return result + __reverse_ffs(tmp);
+#endif
}
static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
+ while (f2fs_test_bit(offset, (unsigned char *)addr))
+ offset++;
+
+ if (offset > size)
+ offset = size;
+
+ return offset;
+#if 0
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
@@ -173,6 +190,7 @@ found_first:
return result + size; /* Nope. */
found_middle:
return result + __reverse_ffz(tmp);
+#endif
}
void register_inmem_page(struct inode *inode, struct page *page)
@@ -216,8 +234,10 @@ void commit_inmem_pages(struct inode *inode, bool abort)
struct inmem_pages *cur, *tmp;
bool submit_bio = false;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
+ .encrypted_page = NULL,
};
/*
@@ -241,7 +261,8 @@ void commit_inmem_pages(struct inode *inode, bool abort)
if (clear_page_dirty_for_io(cur->page))
inode_dec_dirty_pages(inode);
trace_f2fs_commit_inmem_page(cur->page, INMEM);
- do_write_data_page(cur->page, &fio);
+ fio.page = cur->page;
+ do_write_data_page(&fio);
submit_bio = true;
}
f2fs_put_page(cur->page, 1);
@@ -466,22 +487,43 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
{
sector_t start = SECTOR_FROM_BLOCK(blkstart);
sector_t len = SECTOR_FROM_BLOCK(blklen);
+ struct seg_entry *se;
+ unsigned int offset;
+ block_t i;
+
+ for (i = blkstart; i < blkstart + blklen; i++) {
+ se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
+ offset = GET_BLKOFF_FROM_SEG0(sbi, i);
+
+ if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ sbi->discard_blks--;
+ }
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}
void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- if (f2fs_issue_discard(sbi, blkaddr, 1)) {
- struct page *page = grab_meta_page(sbi, blkaddr);
- /* zero-filled page */
- set_page_dirty(page);
- f2fs_put_page(page, 1);
+ int err = -ENOTSUPP;
+
+ if (test_opt(sbi, DISCARD)) {
+ struct seg_entry *se = get_seg_entry(sbi,
+ GET_SEGNO(sbi, blkaddr));
+ unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+
+ if (f2fs_test_bit(offset, se->discard_map))
+ return;
+
+ err = f2fs_issue_discard(sbi, blkaddr, 1);
}
+
+ if (err)
+ update_meta_page(sbi, NULL, blkaddr);
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
- struct cp_control *cpc, unsigned int start, unsigned int end)
+ struct cp_control *cpc, struct seg_entry *se,
+ unsigned int start, unsigned int end)
{
struct list_head *head = &SM_I(sbi)->discard_list;
struct discard_entry *new, *last;
@@ -502,7 +544,6 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
list_add_tail(&new->list, head);
done:
SM_I(sbi)->nr_discards += end - start;
- cpc->trimmed += end - start;
}
static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
@@ -512,41 +553,24 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long *discard_map = (unsigned long *)se->discard_map;
unsigned long *dmap = SIT_I(sbi)->tmp_map;
unsigned int start = 0, end = -1;
bool force = (cpc->reason == CP_DISCARD);
int i;
- if (!force && (!test_opt(sbi, DISCARD) ||
- SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards))
+ if (se->valid_blocks == max_blocks)
return;
- if (force && !se->valid_blocks) {
- struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- /*
- * if this segment is registered in the prefree list, then
- * we should skip adding a discard candidate, and let the
- * checkpoint do that later.
- */
- mutex_lock(&dirty_i->seglist_lock);
- if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
- mutex_unlock(&dirty_i->seglist_lock);
- cpc->trimmed += sbi->blocks_per_seg;
+ if (!force) {
+ if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
+ SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
return;
- }
- mutex_unlock(&dirty_i->seglist_lock);
-
- __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
- return;
}
- /* zero block will be discarded through the prefree list */
- if (!se->valid_blocks || se->valid_blocks == max_blocks)
- return;
-
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
- dmap[i] = force ? ~ckpt_map[i] :
+ dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
@@ -555,11 +579,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
break;
end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
-
- if (force && end - start < cpc->trim_minlen)
- continue;
-
- __add_discard_entry(sbi, cpc, start, end);
+ __add_discard_entry(sbi, cpc, se, start, end);
}
}
@@ -589,7 +609,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
}
-void clear_prefree_segments(struct f2fs_sb_info *sbi)
+void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct list_head *head = &(SM_I(sbi)->discard_list);
struct discard_entry *entry, *this;
@@ -622,7 +642,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
/* send small discards */
list_for_each_entry_safe(entry, this, head, list) {
+ if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen)
+ goto skip;
f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
+ cpc->trimmed += entry->len;
+skip:
list_del(&entry->list);
SM_I(sbi)->nr_discards -= entry->len;
kmem_cache_free(discard_entry_slab, entry);
@@ -673,9 +697,13 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
if (del > 0) {
if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
+ if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ sbi->discard_blks--;
} else {
if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
+ if (f2fs_test_and_clear_bit(offset, se->discard_map))
+ sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
se->ckpt_valid_blocks += del;
@@ -769,16 +797,25 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
}
-static void write_sum_page(struct f2fs_sb_info *sbi,
- struct f2fs_summary_block *sum_blk, block_t blk_addr)
+void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
{
struct page *page = grab_meta_page(sbi, blk_addr);
- void *kaddr = page_address(page);
- memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
+ void *dst = page_address(page);
+
+ if (src)
+ memcpy(dst, src, PAGE_CACHE_SIZE);
+ else
+ memset(dst, 0, PAGE_CACHE_SIZE);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
+static void write_sum_page(struct f2fs_sb_info *sbi,
+ struct f2fs_summary_block *sum_blk, block_t blk_addr)
+{
+ update_meta_page(sbi, (void *)sum_blk, blk_addr);
+}
+
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -1060,8 +1097,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
unsigned int start_segno, end_segno;
struct cp_control cpc;
- if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
- range->len < sbi->blocksize)
+ if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
cpc.trimmed = 0;
@@ -1073,12 +1109,19 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
cpc.reason = CP_DISCARD;
- cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen);
+ cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
/* do checkpoint to issue discard commands safely */
for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
cpc.trim_start = start_segno;
- cpc.trim_end = min_t(unsigned int, rounddown(start_segno +
+
+ if (sbi->discard_blks == 0)
+ break;
+ else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
+ cpc.trim_end = end_segno;
+ else
+ cpc.trim_end = min_t(unsigned int,
+ rounddown(start_segno +
BATCHED_TRIM_SEGMENTS(sbi),
sbi->segs_per_sec) - 1, end_segno);
@@ -1206,84 +1249,95 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);
}
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_summary *sum,
- struct f2fs_io_info *fio)
+static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
- int type = __get_segment_type(page, fio->type);
+ int type = __get_segment_type(fio->page, fio->type);
- allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type);
+ allocate_data_block(fio->sbi, fio->page, fio->blk_addr,
+ &fio->blk_addr, sum, type);
/* writeout dirty page into bdev */
- f2fs_submit_page_mbio(sbi, page, fio);
+ f2fs_submit_page_mbio(fio);
}
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = META,
.rw = WRITE_SYNC | REQ_META | REQ_PRIO,
.blk_addr = page->index,
+ .page = page,
+ .encrypted_page = NULL,
};
set_page_writeback(page);
- f2fs_submit_page_mbio(sbi, page, &fio);
+ f2fs_submit_page_mbio(&fio);
}
-void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
- unsigned int nid, struct f2fs_io_info *fio)
+void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
{
struct f2fs_summary sum;
+
set_summary(&sum, nid, 0, 0);
- do_write_page(sbi, page, &sum, fio);
+ do_write_page(&sum, fio);
}
-void write_data_page(struct page *page, struct dnode_of_data *dn,
- struct f2fs_io_info *fio)
+void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- do_write_page(sbi, page, &sum, fio);
+ do_write_page(&sum, fio);
dn->data_blkaddr = fio->blk_addr;
}
-void rewrite_data_page(struct page *page, struct f2fs_io_info *fio)
+void rewrite_data_page(struct f2fs_io_info *fio)
{
- stat_inc_inplace_blocks(F2FS_P_SB(page));
- f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio);
+ stat_inc_inplace_blocks(fio->sbi);
+ f2fs_submit_page_mbio(fio);
}
-void recover_data_page(struct f2fs_sb_info *sbi,
- struct page *page, struct f2fs_summary *sum,
- block_t old_blkaddr, block_t new_blkaddr)
+static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
+ struct f2fs_summary *sum,
+ block_t old_blkaddr, block_t new_blkaddr,
+ bool recover_curseg)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
unsigned int segno, old_cursegno;
struct seg_entry *se;
int type;
+ unsigned short old_blkoff;
segno = GET_SEGNO(sbi, new_blkaddr);
se = get_seg_entry(sbi, segno);
type = se->type;
- if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
- if (old_blkaddr == NULL_ADDR)
- type = CURSEG_COLD_DATA;
- else
+ if (!recover_curseg) {
+ /* for recovery flow */
+ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
+ if (old_blkaddr == NULL_ADDR)
+ type = CURSEG_COLD_DATA;
+ else
+ type = CURSEG_WARM_DATA;
+ }
+ } else {
+ if (!IS_CURSEG(sbi, segno))
type = CURSEG_WARM_DATA;
}
+
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
old_cursegno = curseg->segno;
+ old_blkoff = curseg->next_blkoff;
/* change the current segment */
if (segno != curseg->segno) {
@@ -1297,30 +1351,67 @@ void recover_data_page(struct f2fs_sb_info *sbi,
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
locate_dirty_segment(sbi, old_cursegno);
+ if (recover_curseg) {
+ if (old_cursegno != curseg->segno) {
+ curseg->next_segno = old_cursegno;
+ change_curseg(sbi, type, true);
+ }
+ curseg->next_blkoff = old_blkoff;
+ }
+
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
}
+void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
+ block_t old_addr, block_t new_addr,
+ unsigned char version, bool recover_curseg)
+{
+ struct f2fs_summary sum;
+
+ set_summary(&sum, dn->nid, dn->ofs_in_node, version);
+
+ __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg);
+
+ dn->data_blkaddr = new_addr;
+ set_data_blkaddr(dn);
+ f2fs_update_extent_cache(dn);
+}
+
static inline bool is_merged_page(struct f2fs_sb_info *sbi,
struct page *page, enum page_type type)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = &sbi->write_io[btype];
struct bio_vec *bvec;
+ struct page *target;
int i;
down_read(&io->io_rwsem);
- if (!io->bio)
- goto out;
+ if (!io->bio) {
+ up_read(&io->io_rwsem);
+ return false;
+ }
bio_for_each_segment_all(bvec, io->bio, i) {
- if (page == bvec->bv_page) {
+
+ if (bvec->bv_page->mapping) {
+ target = bvec->bv_page;
+ } else {
+ struct f2fs_crypto_ctx *ctx;
+
+ /* encrypted page */
+ ctx = (struct f2fs_crypto_ctx *)page_private(
+ bvec->bv_page);
+ target = ctx->w.control_page;
+ }
+
+ if (page == target) {
up_read(&io->io_rwsem);
return true;
}
}
-out:
up_read(&io->io_rwsem);
return false;
}
@@ -1857,8 +1948,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
sit_i->sentries[start].ckpt_valid_map
= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
- if (!sit_i->sentries[start].cur_valid_map
- || !sit_i->sentries[start].ckpt_valid_map)
+ sit_i->sentries[start].discard_map
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ if (!sit_i->sentries[start].cur_valid_map ||
+ !sit_i->sentries[start].ckpt_valid_map ||
+ !sit_i->sentries[start].discard_map)
return -ENOMEM;
}
@@ -1996,6 +2090,11 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
got_it:
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
+
+ /* build discard map only one time */
+ memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
+
if (sbi->segs_per_sec > 1) {
struct sec_entry *e = get_sec_entry(sbi, start);
e->valid_blocks += se->valid_blocks;
@@ -2245,6 +2344,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
for (start = 0; start < MAIN_SEGS(sbi); start++) {
kfree(sit_i->sentries[start].cur_valid_map);
kfree(sit_i->sentries[start].ckpt_valid_map);
+ kfree(sit_i->sentries[start].discard_map);
}
}
kfree(sit_i->tmp_map);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 85d7fa7514b2..849635778118 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -163,6 +163,7 @@ struct seg_entry {
*/
unsigned short ckpt_valid_blocks;
unsigned char *ckpt_valid_map;
+ unsigned char *discard_map;
unsigned char type; /* segment type like CURSEG_XXX_TYPE */
unsigned long long mtime; /* modification time of the segment */
};
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b2dd1b01f076..a06b0b46fe69 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -258,6 +258,7 @@ static void init_once(void *foo)
static int parse_options(struct super_block *sb, char *options)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct request_queue *q;
substring_t args[MAX_OPT_ARGS];
char *p, *name;
int arg = 0;
@@ -302,7 +303,14 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
break;
case Opt_discard:
- set_opt(sbi, DISCARD);
+ q = bdev_get_queue(sb->s_bdev);
+ if (blk_queue_discard(q)) {
+ set_opt(sbi, DISCARD);
+ } else {
+ f2fs_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ }
break;
case Opt_noheap:
set_opt(sbi, NOHEAP);
@@ -416,6 +424,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ fi->i_crypt_info = NULL;
+#endif
return &fi->vfs_inode;
}
@@ -428,8 +439,31 @@ static int f2fs_drop_inode(struct inode *inode)
* - f2fs_gc -> iput -> evict
* - inode_wait_for_writeback(inode)
*/
- if (!inode_unhashed(inode) && inode->i_state & I_SYNC)
+ if (!inode_unhashed(inode) && inode->i_state & I_SYNC) {
+ if (!inode->i_nlink && !is_bad_inode(inode)) {
+ spin_unlock(&inode->i_lock);
+
+ /* some remained atomic pages should discarded */
+ if (f2fs_is_atomic_file(inode))
+ commit_inmem_pages(inode, true);
+
+ sb_start_intwrite(inode->i_sb);
+ i_size_write(inode, 0);
+
+ if (F2FS_HAS_BLOCKS(inode))
+ f2fs_truncate(inode);
+
+ sb_end_intwrite(inode->i_sb);
+
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ if (F2FS_I(inode)->i_crypt_info)
+ f2fs_free_encryption_info(inode,
+ F2FS_I(inode)->i_crypt_info);
+#endif
+ spin_lock(&inode->i_lock);
+ }
return 0;
+ }
return generic_drop_inode(inode);
}
@@ -520,7 +554,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
} else {
f2fs_balance_fs(sbi);
}
- f2fs_trace_ios(NULL, NULL, 1);
+ f2fs_trace_ios(NULL, 1);
return 0;
}
@@ -658,6 +692,22 @@ static const struct file_operations f2fs_seq_segment_info_fops = {
.release = single_release,
};
+static void default_options(struct f2fs_sb_info *sbi)
+{
+ /* init some FS parameters */
+ sbi->active_logs = NR_CURSEG_TYPE;
+
+ set_opt(sbi, BG_GC);
+ set_opt(sbi, INLINE_DATA);
+
+#ifdef CONFIG_F2FS_FS_XATTR
+ set_opt(sbi, XATTR_USER);
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ set_opt(sbi, POSIX_ACL);
+#endif
+}
+
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -676,7 +726,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
active_logs = sbi->active_logs;
sbi->mount_opt.opt = 0;
- sbi->active_logs = NR_CURSEG_TYPE;
+ default_options(sbi);
/* parse mount options */
err = parse_options(sb, data);
@@ -929,29 +979,36 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
*/
static int read_raw_super_block(struct super_block *sb,
struct f2fs_super_block **raw_super,
- struct buffer_head **raw_super_buf)
+ struct buffer_head **raw_super_buf,
+ int *recovery)
{
int block = 0;
+ struct buffer_head *buffer;
+ struct f2fs_super_block *super;
+ int err = 0;
retry:
- *raw_super_buf = sb_bread(sb, block);
- if (!*raw_super_buf) {
+ buffer = sb_bread(sb, block);
+ if (!buffer) {
+ *recovery = 1;
f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
block + 1);
if (block == 0) {
block++;
goto retry;
} else {
- return -EIO;
+ err = -EIO;
+ goto out;
}
}
- *raw_super = (struct f2fs_super_block *)
- ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
+ super = (struct f2fs_super_block *)
+ ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
- if (sanity_check_raw_super(sb, *raw_super)) {
- brelse(*raw_super_buf);
+ if (sanity_check_raw_super(sb, super)) {
+ brelse(buffer);
+ *recovery = 1;
f2fs_msg(sb, KERN_ERR,
"Can't find valid F2FS filesystem in %dth superblock",
block + 1);
@@ -959,25 +1016,76 @@ retry:
block++;
goto retry;
} else {
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
}
+ if (!*raw_super) {
+ *raw_super_buf = buffer;
+ *raw_super = super;
+ } else {
+ /* already have a valid superblock */
+ brelse(buffer);
+ }
+
+ /* check the validity of the second superblock */
+ if (block == 0) {
+ block++;
+ goto retry;
+ }
+
+out:
+ /* No valid superblock */
+ if (!*raw_super)
+ return err;
+
return 0;
}
+int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
+{
+ struct buffer_head *sbh = sbi->raw_super_buf;
+ sector_t block = sbh->b_blocknr;
+ int err;
+
+ /* write back-up superblock first */
+ sbh->b_blocknr = block ? 0 : 1;
+ mark_buffer_dirty(sbh);
+ err = sync_dirty_buffer(sbh);
+
+ sbh->b_blocknr = block;
+
+ /* if we are in recovery path, skip writing valid superblock */
+ if (recover || err)
+ goto out;
+
+ /* write current valid superblock */
+ mark_buffer_dirty(sbh);
+ err = sync_dirty_buffer(sbh);
+out:
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ return err;
+}
+
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
- struct f2fs_super_block *raw_super = NULL;
+ struct f2fs_super_block *raw_super;
struct buffer_head *raw_super_buf;
struct inode *root;
- long err = -EINVAL;
+ long err;
bool retry = true, need_fsck = false;
char *options = NULL;
- int i;
+ int recovery, i;
try_onemore:
+ err = -EINVAL;
+ raw_super = NULL;
+ raw_super_buf = NULL;
+ recovery = 0;
+
/* allocate memory for f2fs-specific super block info */
sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
if (!sbi)
@@ -989,23 +1097,12 @@ try_onemore:
goto free_sbi;
}
- err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
+ err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery);
if (err)
goto free_sbi;
sb->s_fs_info = sbi;
- /* init some FS parameters */
- sbi->active_logs = NR_CURSEG_TYPE;
-
- set_opt(sbi, BG_GC);
- set_opt(sbi, INLINE_DATA);
-
-#ifdef CONFIG_F2FS_FS_XATTR
- set_opt(sbi, XATTR_USER);
-#endif
-#ifdef CONFIG_F2FS_FS_POSIX_ACL
- set_opt(sbi, POSIX_ACL);
-#endif
+ default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
if (data && !options) {
@@ -1148,14 +1245,6 @@ try_onemore:
proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
&f2fs_seq_segment_info_fops, sb);
- if (test_opt(sbi, DISCARD)) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- if (!blk_queue_discard(q))
- f2fs_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
- }
-
sbi->s_kobj.kset = f2fs_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
@@ -1198,6 +1287,13 @@ try_onemore:
goto free_kobj;
}
kfree(options);
+
+ /* recover broken superblock */
+ if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) {
+ f2fs_msg(sb, KERN_INFO, "Recover invalid superblock");
+ f2fs_commit_super(sbi, true);
+ }
+
return 0;
free_kobj:
@@ -1305,13 +1401,18 @@ static int __init init_f2fs_fs(void)
err = -ENOMEM;
goto free_extent_cache;
}
- err = register_filesystem(&f2fs_fs_type);
+ err = f2fs_init_crypto();
if (err)
goto free_kset;
+ err = register_filesystem(&f2fs_fs_type);
+ if (err)
+ goto free_crypto;
f2fs_create_root_stats();
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0;
+free_crypto:
+ f2fs_exit_crypto();
free_kset:
kset_unregister(f2fs_kset);
free_extent_cache:
@@ -1333,6 +1434,7 @@ static void __exit exit_f2fs_fs(void)
remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
+ f2fs_exit_crypto();
destroy_extent_cache();
destroy_checkpoint_caches();
destroy_segment_manager_caches();
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index 875aa8179bc1..145fb659ad44 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -80,7 +80,7 @@ out:
radix_tree_preload_end();
}
-void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
+void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
{
struct inode *inode;
pid_t pid;
@@ -91,8 +91,8 @@ void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
return;
}
- inode = page->mapping->host;
- pid = page_private(page);
+ inode = fio->page->mapping->host;
+ pid = page_private(fio->page);
major = MAJOR(inode->i_sb->s_dev);
minor = MINOR(inode->i_sb->s_dev);
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 1041dbeb52ae..67db24ac1e85 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -33,12 +33,12 @@ struct last_io_info {
};
extern void f2fs_trace_pid(struct page *);
-extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int);
+extern void f2fs_trace_ios(struct f2fs_io_info *, int);
extern void f2fs_build_trace_ios(void);
extern void f2fs_destroy_trace_ios(void);
#else
#define f2fs_trace_pid(p)
-#define f2fs_trace_ios(p, i, n)
+#define f2fs_trace_ios(i, n)
#define f2fs_build_trace_ios()
#define f2fs_destroy_trace_ios()
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 9757f65a05bc..07449b980acb 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -584,6 +584,9 @@ static int __f2fs_setxattr(struct inode *inode, int index,
inode->i_ctime = CURRENT_TIME;
clear_inode_flag(fi, FI_ACL_MODE);
}
+ if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
+ !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
+ f2fs_set_encrypted_inode(inode);
if (ipage)
update_inode(inode, ipage);
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 969d792ca362..71a7100d5492 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -35,6 +35,10 @@
#define F2FS_XATTR_INDEX_LUSTRE 5
#define F2FS_XATTR_INDEX_SECURITY 6
#define F2FS_XATTR_INDEX_ADVISE 7
+/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */
+#define F2FS_XATTR_INDEX_ENCRYPTION 9
+
+#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c"
struct f2fs_xattr_header {
__le32 h_magic; /* magic number for identification */
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 999ff5c3cab0..d59712dfa3e7 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -195,8 +195,9 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
goto out_err;
}
/* copy the full handle */
- if (copy_from_user(handle, ufh,
- sizeof(struct file_handle) +
+ *handle = f_handle;
+ if (copy_from_user(&handle->f_handle,
+ &ufh->f_handle,
f_handle.handle_bytes)) {
retval = -EFAULT;
goto out_handle;
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 881aa3d217f0..e3dcb4467d92 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,9 +50,6 @@ extern daddr_t vxfs_bmap1(struct inode *, long);
/* vxfs_fshead.c */
extern int vxfs_read_fshead(struct super_block *);
-/* vxfs_immed.c */
-extern const struct inode_operations vxfs_immed_symlink_iops;
-
/* vxfs_inode.c */
extern const struct address_space_operations vxfs_immed_aops;
extern struct kmem_cache *vxfs_inode_cachep;
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8b9229e2ca5c..cb84f0fcc72a 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -32,29 +32,15 @@
*/
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/namei.h>
#include "vxfs.h"
#include "vxfs_extern.h"
#include "vxfs_inode.h"
-static void * vxfs_immed_follow_link(struct dentry *, struct nameidata *);
-
static int vxfs_immed_readpage(struct file *, struct page *);
/*
- * Inode operations for immed symlinks.
- *
- * Unliked all other operations we do not go through the pagecache,
- * but do all work directly on the inode.
- */
-const struct inode_operations vxfs_immed_symlink_iops = {
- .readlink = generic_readlink,
- .follow_link = vxfs_immed_follow_link,
-};
-
-/*
* Address space operations for immed files and directories.
*/
const struct address_space_operations vxfs_immed_aops = {
@@ -62,26 +48,6 @@ const struct address_space_operations vxfs_immed_aops = {
};
/**
- * vxfs_immed_follow_link - follow immed symlink
- * @dp: dentry for the link
- * @np: pathname lookup data for the current path walk
- *
- * Description:
- * vxfs_immed_follow_link restarts the pathname lookup with
- * the data obtained from @dp.
- *
- * Returns:
- * Zero on success, else a negative error code.
- */
-static void *
-vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
-{
- struct vxfs_inode_info *vip = VXFS_INO(d_inode(dp));
- nd_set_link(np, vip->vii_immed.vi_immed);
- return NULL;
-}
-
-/**
* vxfs_immed_readpage - read part of an immed inode into pagecache
* @file: file context (unused)
* @page: page frame to fill in.
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 363e3ae25f6b..ef73ed674a27 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -35,6 +35,7 @@
#include <linux/pagemap.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/namei.h>
#include "vxfs.h"
#include "vxfs_inode.h"
@@ -327,8 +328,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
ip->i_op = &page_symlink_inode_operations;
ip->i_mapping->a_ops = &vxfs_aops;
} else {
- ip->i_op = &vxfs_immed_symlink_iops;
- vip->vii_immed.vi_immed[ip->i_size] = '\0';
+ ip->i_op = &simple_symlink_inode_operations;
+ ip->i_link = vip->vii_immed.vi_immed;
+ nd_terminate_link(ip->i_link, ip->i_size,
+ sizeof(vip->vii_immed.vi_immed) - 1);
}
} else
init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0572bca49f15..5e2e08712d3b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1365,7 +1365,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
return err;
}
-static char *read_link(struct dentry *dentry)
+static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
{
struct inode *inode = d_inode(dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1389,28 +1389,12 @@ static char *read_link(struct dentry *dentry)
link = ERR_PTR(ret);
} else {
link[ret] = '\0';
+ *cookie = link;
}
fuse_invalidate_atime(inode);
return link;
}
-static void free_link(char *link)
-{
- if (!IS_ERR(link))
- free_page((unsigned long) link);
-}
-
-static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- nd_set_link(nd, read_link(dentry));
- return NULL;
-}
-
-static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
-{
- free_link(nd_get_link(nd));
-}
-
static int fuse_dir_open(struct inode *inode, struct file *file)
{
return fuse_open_common(inode, file, true);
@@ -1926,7 +1910,7 @@ static const struct inode_operations fuse_common_inode_operations = {
static const struct inode_operations fuse_symlink_inode_operations = {
.setattr = fuse_setattr,
.follow_link = fuse_follow_link,
- .put_link = fuse_put_link,
+ .put_link = free_page_put_link,
.readlink = generic_readlink,
.getattr = fuse_getattr,
.setxattr = fuse_setxattr,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 1b3ca7a2e3fc..3a1461de1551 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1548,7 +1548,7 @@ out:
* Returns: 0 on success or error code
*/
-static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
{
struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
struct gfs2_holder i_gh;
@@ -1561,8 +1561,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
error = gfs2_glock_nq(&i_gh);
if (error) {
gfs2_holder_uninit(&i_gh);
- nd_set_link(nd, ERR_PTR(error));
- return NULL;
+ return ERR_PTR(error);
}
size = (unsigned int)i_size_read(&ip->i_inode);
@@ -1586,8 +1585,9 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
brelse(dibh);
out:
gfs2_glock_dq_uninit(&i_gh);
- nd_set_link(nd, buf);
- return NULL;
+ if (!IS_ERR(buf))
+ *cookie = buf;
+ return buf;
}
/**
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 07d8d8f52faf..059597b23f67 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -892,7 +892,7 @@ static const struct inode_operations hostfs_dir_iops = {
.setattr = hostfs_setattr,
};
-static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
{
char *link = __getname();
if (link) {
@@ -906,21 +906,18 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
}
if (err < 0) {
__putname(link);
- link = ERR_PTR(err);
+ return ERR_PTR(err);
}
} else {
- link = ERR_PTR(-ENOMEM);
+ return ERR_PTR(-ENOMEM);
}
- nd_set_link(nd, link);
- return NULL;
+ return *cookie = link;
}
-static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+static void hostfs_put_link(struct inode *unused, void *cookie)
{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- __putname(s);
+ __putname(cookie);
}
static const struct inode_operations hostfs_link_iops = {
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index fa2bd5366ecf..2867837909a9 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -642,20 +642,19 @@ static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
buflen);
}
-static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *hppfs_follow_link(struct dentry *dentry, void **cookie)
{
struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
- return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, nd);
+ return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, cookie);
}
-static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
+static void hppfs_put_link(struct inode *inode, void *cookie)
{
- struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
+ struct inode *proc_inode = d_inode(HPPFS_I(inode)->proc_dentry);
- if (d_inode(proc_dentry)->i_op->put_link)
- d_inode(proc_dentry)->i_op->put_link(proc_dentry, nd, cookie);
+ if (proc_inode->i_op->put_link)
+ proc_inode->i_op->put_link(proc_inode, cookie);
}
static const struct inode_operations hppfs_dir_iops = {
diff --git a/fs/inode.c b/fs/inode.c
index ea37cd17b53f..e8d62688ed91 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -152,6 +152,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_pipe = NULL;
inode->i_bdev = NULL;
inode->i_cdev = NULL;
+ inode->i_link = NULL;
inode->i_rdev = 0;
inode->dirtied_when = 0;
@@ -1584,36 +1585,47 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
* This function automatically handles read only file systems and media,
* as well as the "noatime" flag and inode specific "noatime" markers.
*/
-void touch_atime(const struct path *path)
+bool atime_needs_update(const struct path *path, struct inode *inode)
{
struct vfsmount *mnt = path->mnt;
- struct inode *inode = d_inode(path->dentry);
struct timespec now;
if (inode->i_flags & S_NOATIME)
- return;
+ return false;
if (IS_NOATIME(inode))
- return;
+ return false;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
- return;
+ return false;
if (mnt->mnt_flags & MNT_NOATIME)
- return;
+ return false;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- return;
+ return false;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
- return;
+ return false;
if (timespec_equal(&inode->i_atime, &now))
+ return false;
+
+ return true;
+}
+
+void touch_atime(const struct path *path)
+{
+ struct vfsmount *mnt = path->mnt;
+ struct inode *inode = d_inode(path->dentry);
+ struct timespec now;
+
+ if (!atime_needs_update(path, inode))
return;
if (!sb_start_write_trylock(inode->i_sb))
return;
- if (__mnt_want_write(mnt))
+ if (__mnt_want_write(mnt) != 0)
goto skip_update;
/*
* File systems can error out when updating inodes if they need to
@@ -1624,6 +1636,7 @@ void touch_atime(const struct path *path)
* We may also fail on filesystems that have the ability to make parts
* of the fs read only, e.g. subvolumes in Btrfs.
*/
+ now = current_fs_time(inode->i_sb);
update_time(inode, &now, S_ATIME);
__mnt_drop_write(mnt);
skip_update:
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 1ba5c97943b8..81180022923f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -354,6 +354,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
ret = -ENOMEM;
goto fail;
}
+ inode->i_link = f->target;
jffs2_dbg(1, "%s(): symlink's target '%s' cached\n",
__func__, (char *)f->target);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index fe5ea080b4ec..2caf1682036d 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -272,12 +272,9 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
mutex_lock(&f->sem);
ret = jffs2_do_read_inode(c, f, inode->i_ino, &latest_node);
+ if (ret)
+ goto error;
- if (ret) {
- mutex_unlock(&f->sem);
- iget_failed(inode);
- return ERR_PTR(ret);
- }
inode->i_mode = jemode_to_cpu(latest_node.mode);
i_uid_write(inode, je16_to_cpu(latest_node.uid));
i_gid_write(inode, je16_to_cpu(latest_node.gid));
@@ -294,6 +291,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
case S_IFLNK:
inode->i_op = &jffs2_symlink_inode_operations;
+ inode->i_link = f->target;
break;
case S_IFDIR:
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index dddbde4f56f4..28e0aab42bc3 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1203,17 +1203,13 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
ret, retlen, sizeof(*latest_node));
/* FIXME: If this fails, there seems to be a memory leak. Find it. */
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
- return ret?ret:-EIO;
+ return ret ? ret : -EIO;
}
crc = crc32(0, latest_node, sizeof(*latest_node)-8);
if (crc != je32_to_cpu(latest_node->node_crc)) {
JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
f->inocache->ino, ref_offset(rii.latest_ref));
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
return -EIO;
}
@@ -1250,16 +1246,11 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
* keep in RAM to facilitate quick follow symlink
* operation. */
uint32_t csize = je32_to_cpu(latest_node->csize);
- if (csize > JFFS2_MAX_NAME_LEN) {
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
+ if (csize > JFFS2_MAX_NAME_LEN)
return -ENAMETOOLONG;
- }
f->target = kmalloc(csize + 1, GFP_KERNEL);
if (!f->target) {
JFFS2_ERROR("can't allocate %u bytes of memory for the symlink target path cache\n", csize);
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
return -ENOMEM;
}
@@ -1271,8 +1262,6 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
ret = -EIO;
kfree(f->target);
f->target = NULL;
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
return ret;
}
@@ -1289,15 +1278,11 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
if (f->metadata) {
JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
f->inocache->ino, jemode_to_cpu(latest_node->mode));
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
return -EIO;
}
if (!frag_first(&f->fragtree)) {
JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
f->inocache->ino, jemode_to_cpu(latest_node->mode));
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
return -EIO;
}
/* ASSERT: f->fraglist != NULL */
@@ -1305,8 +1290,6 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
f->inocache->ino, jemode_to_cpu(latest_node->mode));
/* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
return -EIO;
}
/* OK. We're happy */
@@ -1400,10 +1383,8 @@ int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
f->inocache = ic;
ret = jffs2_do_read_inode_internal(c, f, &n);
- if (!ret) {
- mutex_unlock(&f->sem);
- jffs2_do_clear_inode(c, f);
- }
+ mutex_unlock(&f->sem);
+ jffs2_do_clear_inode(c, f);
jffs2_xattr_do_crccheck_inode(c, ic);
kfree (f);
return ret;
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 1fefa25d0fa5..8ce2f240125b 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -9,58 +9,15 @@
*
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
#include "nodelist.h"
-static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
-
const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
- .follow_link = jffs2_follow_link,
+ .follow_link = simple_follow_link,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
.listxattr = jffs2_listxattr,
.removexattr = jffs2_removexattr
};
-
-static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
- char *p = (char *)f->target;
-
- /*
- * We don't acquire the f->sem mutex here since the only data we
- * use is f->target.
- *
- * 1. If we are here the inode has already built and f->target has
- * to point to the target path.
- * 2. Nobody uses f->target (if the inode is symlink's inode). The
- * exception is inode freeing function which frees f->target. But
- * it can't be called while we are here and before VFS has
- * stopped using our f->target string which we provide by means of
- * nd_set_link() call.
- */
-
- if (!p) {
- pr_err("%s(): can't find symlink target\n", __func__);
- p = ERR_PTR(-EIO);
- }
- jffs2_dbg(1, "%s(): target path is '%s'\n",
- __func__, (char *)f->target);
-
- nd_set_link(nd, p);
-
- /*
- * We will unlock the f->sem mutex but VFS will use the f->target string. This is safe
- * since the only way that may cause f->target to be changed is iput() operation.
- * But VFS will not use f->target after iput() has been called.
- */
- return NULL;
-}
-
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 070dc4b33544..6f1cb2b5ee28 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -63,11 +63,12 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
inode->i_mapping->a_ops = &jfs_aops;
} else {
inode->i_op = &jfs_fast_symlink_inode_operations;
+ inode->i_link = JFS_IP(inode)->i_inline;
/*
* The inline data should be null-terminated, but
* don't let on-disk corruption crash the kernel
*/
- JFS_IP(inode)->i_inline[inode->i_size] = '\0';
+ inode->i_link[inode->i_size] = '\0';
}
} else {
inode->i_op = &jfs_file_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 66db7bc0ed10..e33be921aa41 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -880,7 +880,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
int ssize; /* source pathname size */
struct btstack btstack;
struct inode *ip = d_inode(dentry);
- unchar *i_fastsymlink;
s64 xlen = 0;
int bmask = 0, xsize;
s64 xaddr;
@@ -946,8 +945,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
if (ssize <= IDATASIZE) {
ip->i_op = &jfs_fast_symlink_inode_operations;
- i_fastsymlink = JFS_IP(ip)->i_inline;
- memcpy(i_fastsymlink, name, ssize);
+ ip->i_link = JFS_IP(ip)->i_inline;
+ memcpy(ip->i_link, name, ssize);
ip->i_size = ssize - 1;
/*
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 80f42bcc4ef1..5929e2363cb8 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -17,21 +17,13 @@
*/
#include <linux/fs.h>
-#include <linux/namei.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_xattr.h"
-static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- char *s = JFS_IP(d_inode(dentry))->i_inline;
- nd_set_link(nd, s);
- return NULL;
-}
-
const struct inode_operations jfs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = jfs_follow_link,
+ .follow_link = simple_follow_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 8a198898e39a..db272528ab5b 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -112,25 +112,18 @@ static int kernfs_getlink(struct dentry *dentry, char *path)
return error;
}
-static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie)
{
int error = -ENOMEM;
unsigned long page = get_zeroed_page(GFP_KERNEL);
- if (page) {
- error = kernfs_getlink(dentry, (char *) page);
- if (error < 0)
- free_page((unsigned long)page);
- }
- nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
- return NULL;
-}
-
-static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- char *page = nd_get_link(nd);
- if (!IS_ERR(page))
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+ error = kernfs_getlink(dentry, (char *)page);
+ if (unlikely(error < 0)) {
free_page((unsigned long)page);
+ return ERR_PTR(error);
+ }
+ return *cookie = (char *)page;
}
const struct inode_operations kernfs_symlink_iops = {
@@ -140,7 +133,7 @@ const struct inode_operations kernfs_symlink_iops = {
.listxattr = kernfs_iop_listxattr,
.readlink = generic_readlink,
.follow_link = kernfs_iop_follow_link,
- .put_link = kernfs_iop_put_link,
+ .put_link = free_page_put_link,
.setattr = kernfs_iop_setattr,
.getattr = kernfs_iop_getattr,
.permission = kernfs_iop_permission,
diff --git a/fs/libfs.c b/fs/libfs.c
index cb1fb4b9b637..65e1feca8b98 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1024,15 +1024,18 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(noop_fsync);
-void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
+void kfree_put_link(struct inode *unused, void *cookie)
{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- kfree(s);
+ kfree(cookie);
}
EXPORT_SYMBOL(kfree_put_link);
+void free_page_put_link(struct inode *unused, void *cookie)
+{
+ free_page((unsigned long) cookie);
+}
+EXPORT_SYMBOL(free_page_put_link);
+
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
* anon inodes.
@@ -1093,3 +1096,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
return -EINVAL;
}
EXPORT_SYMBOL(simple_nosetlease);
+
+const char *simple_follow_link(struct dentry *dentry, void **cookie)
+{
+ return d_inode(dentry)->i_link;
+}
+EXPORT_SYMBOL(simple_follow_link);
+
+const struct inode_operations simple_symlink_inode_operations = {
+ .follow_link = simple_follow_link,
+ .readlink = generic_readlink
+};
+EXPORT_SYMBOL(simple_symlink_inode_operations);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 4cf38f118549..f9b45d46d4c4 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -779,6 +779,7 @@ fail:
const struct inode_operations logfs_symlink_iops = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
+ .put_link = page_put_link,
};
const struct inode_operations logfs_dir_iops = {
diff --git a/fs/mount.h b/fs/mount.h
index 6a61c2b3e385..b5b8082bfa42 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -88,6 +88,7 @@ static inline int is_mounted(struct vfsmount *mnt)
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
+extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
extern void __detach_mounts(struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index fe30d3be43a8..2dad0eaf91d3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -492,6 +492,7 @@ void path_put(const struct path *path)
}
EXPORT_SYMBOL(path_put);
+#define EMBEDDED_LEVELS 2
struct nameidata {
struct path path;
struct qstr last;
@@ -501,10 +502,139 @@ struct nameidata {
unsigned seq, m_seq;
int last_type;
unsigned depth;
- struct file *base;
- char *saved_names[MAX_NESTED_LINKS + 1];
+ int total_link_count;
+ struct saved {
+ struct path link;
+ void *cookie;
+ const char *name;
+ struct inode *inode;
+ unsigned seq;
+ } *stack, internal[EMBEDDED_LEVELS];
+ struct filename *name;
+ struct nameidata *saved;
+ unsigned root_seq;
+ int dfd;
};
+static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
+{
+ struct nameidata *old = current->nameidata;
+ p->stack = p->internal;
+ p->dfd = dfd;
+ p->name = name;
+ p->total_link_count = old ? old->total_link_count : 0;
+ p->saved = old;
+ current->nameidata = p;
+}
+
+static void restore_nameidata(void)
+{
+ struct nameidata *now = current->nameidata, *old = now->saved;
+
+ current->nameidata = old;
+ if (old)
+ old->total_link_count = now->total_link_count;
+ if (now->stack != now->internal) {
+ kfree(now->stack);
+ now->stack = now->internal;
+ }
+}
+
+static int __nd_alloc_stack(struct nameidata *nd)
+{
+ struct saved *p;
+
+ if (nd->flags & LOOKUP_RCU) {
+ p= kmalloc(MAXSYMLINKS * sizeof(struct saved),
+ GFP_ATOMIC);
+ if (unlikely(!p))
+ return -ECHILD;
+ } else {
+ p= kmalloc(MAXSYMLINKS * sizeof(struct saved),
+ GFP_KERNEL);
+ if (unlikely(!p))
+ return -ENOMEM;
+ }
+ memcpy(p, nd->internal, sizeof(nd->internal));
+ nd->stack = p;
+ return 0;
+}
+
+static inline int nd_alloc_stack(struct nameidata *nd)
+{
+ if (likely(nd->depth != EMBEDDED_LEVELS))
+ return 0;
+ if (likely(nd->stack != nd->internal))
+ return 0;
+ return __nd_alloc_stack(nd);
+}
+
+static void drop_links(struct nameidata *nd)
+{
+ int i = nd->depth;
+ while (i--) {
+ struct saved *last = nd->stack + i;
+ struct inode *inode = last->inode;
+ if (last->cookie && inode->i_op->put_link) {
+ inode->i_op->put_link(inode, last->cookie);
+ last->cookie = NULL;
+ }
+ }
+}
+
+static void terminate_walk(struct nameidata *nd)
+{
+ drop_links(nd);
+ if (!(nd->flags & LOOKUP_RCU)) {
+ int i;
+ path_put(&nd->path);
+ for (i = 0; i < nd->depth; i++)
+ path_put(&nd->stack[i].link);
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ } else {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ }
+ nd->depth = 0;
+}
+
+/* path_put is needed afterwards regardless of success or failure */
+static bool legitimize_path(struct nameidata *nd,
+ struct path *path, unsigned seq)
+{
+ int res = __legitimize_mnt(path->mnt, nd->m_seq);
+ if (unlikely(res)) {
+ if (res > 0)
+ path->mnt = NULL;
+ path->dentry = NULL;
+ return false;
+ }
+ if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
+ path->dentry = NULL;
+ return false;
+ }
+ return !read_seqcount_retry(&path->dentry->d_seq, seq);
+}
+
+static bool legitimize_links(struct nameidata *nd)
+{
+ int i;
+ for (i = 0; i < nd->depth; i++) {
+ struct saved *last = nd->stack + i;
+ if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
+ drop_links(nd);
+ nd->depth = i + 1;
+ return false;
+ }
+ }
+ return true;
+}
+
/*
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
@@ -520,35 +650,28 @@ struct nameidata {
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
* @dentry: child of nd->path.dentry or NULL
+ * @seq: seq number to check dentry against
* Returns: 0 on success, -ECHILD on failure
*
* unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
* for ref-walk mode. @dentry must be a path found by a do_lookup call on
* @nd or NULL. Must be called from rcu-walk context.
+ * Nothing should touch nameidata between unlazy_walk() failure and
+ * terminate_walk().
*/
-static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
+static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq)
{
- struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- /*
- * After legitimizing the bastards, terminate_walk()
- * will do the right thing for non-RCU mode, and all our
- * subsequent exit cases should rcu_read_unlock()
- * before returning. Do vfsmount first; if dentry
- * can't be legitimized, just set nd->path.dentry to NULL
- * and rely on dput(NULL) being a no-op.
- */
- if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
- return -ECHILD;
nd->flags &= ~LOOKUP_RCU;
-
- if (!lockref_get_not_dead(&parent->d_lockref)) {
- nd->path.dentry = NULL;
- goto out;
- }
+ if (unlikely(!legitimize_links(nd)))
+ goto out2;
+ if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
+ goto out2;
+ if (unlikely(!lockref_get_not_dead(&parent->d_lockref)))
+ goto out1;
/*
* For a negative lookup, the lookup sequence point is the parents
@@ -568,7 +691,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
} else {
if (!lockref_get_not_dead(&dentry->d_lockref))
goto out;
- if (read_seqcount_retry(&dentry->d_seq, nd->seq))
+ if (read_seqcount_retry(&dentry->d_seq, seq))
goto drop_dentry;
}
@@ -577,22 +700,24 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
* still valid and get it if required.
*/
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
- spin_lock(&fs->lock);
- if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
- goto unlock_and_drop_dentry;
- path_get(&nd->root);
- spin_unlock(&fs->lock);
+ if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) {
+ rcu_read_unlock();
+ dput(dentry);
+ return -ECHILD;
+ }
}
rcu_read_unlock();
return 0;
-unlock_and_drop_dentry:
- spin_unlock(&fs->lock);
drop_dentry:
rcu_read_unlock();
dput(dentry);
goto drop_root_mnt;
+out2:
+ nd->path.mnt = NULL;
+out1:
+ nd->path.dentry = NULL;
out:
rcu_read_unlock();
drop_root_mnt:
@@ -601,6 +726,24 @@ drop_root_mnt:
return -ECHILD;
}
+static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq)
+{
+ if (unlikely(!legitimize_path(nd, link, seq))) {
+ drop_links(nd);
+ nd->depth = 0;
+ nd->flags &= ~LOOKUP_RCU;
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) {
+ return 0;
+ }
+ path_put(link);
+ return -ECHILD;
+}
+
static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
{
return dentry->d_op->d_revalidate(dentry, flags);
@@ -622,26 +765,10 @@ static int complete_walk(struct nameidata *nd)
int status;
if (nd->flags & LOOKUP_RCU) {
- nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
-
- if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
- rcu_read_unlock();
- return -ECHILD;
- }
- if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
- rcu_read_unlock();
- mntput(nd->path.mnt);
+ if (unlikely(unlazy_walk(nd, NULL, 0)))
return -ECHILD;
- }
- if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
- rcu_read_unlock();
- dput(dentry);
- mntput(nd->path.mnt);
- return -ECHILD;
- }
- rcu_read_unlock();
}
if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -657,28 +784,25 @@ static int complete_walk(struct nameidata *nd)
if (!status)
status = -ESTALE;
- path_put(&nd->path);
return status;
}
-static __always_inline void set_root(struct nameidata *nd)
+static void set_root(struct nameidata *nd)
{
get_fs_root(current->fs, &nd->root);
}
-static int link_path_walk(const char *, struct nameidata *);
-
-static __always_inline unsigned set_root_rcu(struct nameidata *nd)
+static unsigned set_root_rcu(struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
- unsigned seq, res;
+ unsigned seq;
do {
seq = read_seqcount_begin(&fs->seq);
nd->root = fs->root;
- res = __read_seqcount_begin(&nd->root.dentry->d_seq);
+ nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
} while (read_seqcount_retry(&fs->seq, seq));
- return res;
+ return nd->root_seq;
}
static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -704,8 +828,9 @@ static inline void path_to_nameidata(const struct path *path,
* Helper to directly jump to a known parsed path from ->follow_link,
* caller must have taken a reference to path beforehand.
*/
-void nd_jump_link(struct nameidata *nd, struct path *path)
+void nd_jump_link(struct path *path)
{
+ struct nameidata *nd = current->nameidata;
path_put(&nd->path);
nd->path = *path;
@@ -713,24 +838,14 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
nd->flags |= LOOKUP_JUMPED;
}
-void nd_set_link(struct nameidata *nd, char *path)
-{
- nd->saved_names[nd->depth] = path;
-}
-EXPORT_SYMBOL(nd_set_link);
-
-char *nd_get_link(struct nameidata *nd)
-{
- return nd->saved_names[nd->depth];
-}
-EXPORT_SYMBOL(nd_get_link);
-
-static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
+static inline void put_link(struct nameidata *nd)
{
- struct inode *inode = link->dentry->d_inode;
- if (inode->i_op->put_link)
- inode->i_op->put_link(link->dentry, nd, cookie);
- path_put(link);
+ struct saved *last = nd->stack + --nd->depth;
+ struct inode *inode = last->inode;
+ if (last->cookie && inode->i_op->put_link)
+ inode->i_op->put_link(inode, last->cookie);
+ if (!(nd->flags & LOOKUP_RCU))
+ path_put(&last->link);
}
int sysctl_protected_symlinks __read_mostly = 0;
@@ -738,7 +853,6 @@ int sysctl_protected_hardlinks __read_mostly = 0;
/**
* may_follow_link - Check symlink following for unsafe situations
- * @link: The path of the symlink
* @nd: nameidata pathwalk data
*
* In the case of the sysctl_protected_symlinks sysctl being enabled,
@@ -752,7 +866,7 @@ int sysctl_protected_hardlinks __read_mostly = 0;
*
* Returns 0 if following the symlink is allowed, -ve on error.
*/
-static inline int may_follow_link(struct path *link, struct nameidata *nd)
+static inline int may_follow_link(struct nameidata *nd)
{
const struct inode *inode;
const struct inode *parent;
@@ -761,7 +875,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
return 0;
/* Allowed if owner and follower match. */
- inode = link->dentry->d_inode;
+ inode = nd->stack[0].inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0;
@@ -774,9 +888,10 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
if (uid_eq(parent->i_uid, inode->i_uid))
return 0;
- audit_log_link_denied("follow_link", link);
- path_put_conditional(link, nd);
- path_put(&nd->path);
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ audit_log_link_denied("follow_link", &nd->stack[0].link);
return -EACCES;
}
@@ -849,82 +964,68 @@ static int may_linkat(struct path *link)
return -EPERM;
}
-static __always_inline int
-follow_link(struct path *link, struct nameidata *nd, void **p)
+static __always_inline
+const char *get_link(struct nameidata *nd)
{
- struct dentry *dentry = link->dentry;
+ struct saved *last = nd->stack + nd->depth - 1;
+ struct dentry *dentry = last->link.dentry;
+ struct inode *inode = last->inode;
int error;
- char *s;
+ const char *res;
- BUG_ON(nd->flags & LOOKUP_RCU);
-
- if (link->mnt == nd->path.mnt)
- mntget(link->mnt);
-
- error = -ELOOP;
- if (unlikely(current->total_link_count >= 40))
- goto out_put_nd_path;
-
- cond_resched();
- current->total_link_count++;
-
- touch_atime(link);
- nd_set_link(nd, NULL);
+ if (!(nd->flags & LOOKUP_RCU)) {
+ touch_atime(&last->link);
+ cond_resched();
+ } else if (atime_needs_update(&last->link, inode)) {
+ if (unlikely(unlazy_walk(nd, NULL, 0)))
+ return ERR_PTR(-ECHILD);
+ touch_atime(&last->link);
+ }
- error = security_inode_follow_link(link->dentry, nd);
- if (error)
- goto out_put_nd_path;
+ error = security_inode_follow_link(dentry, inode,
+ nd->flags & LOOKUP_RCU);
+ if (unlikely(error))
+ return ERR_PTR(error);
nd->last_type = LAST_BIND;
- *p = dentry->d_inode->i_op->follow_link(dentry, nd);
- error = PTR_ERR(*p);
- if (IS_ERR(*p))
- goto out_put_nd_path;
-
- error = 0;
- s = nd_get_link(nd);
- if (s) {
- if (unlikely(IS_ERR(s))) {
- path_put(&nd->path);
- put_link(nd, link, *p);
- return PTR_ERR(s);
+ res = inode->i_link;
+ if (!res) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(unlazy_walk(nd, NULL, 0)))
+ return ERR_PTR(-ECHILD);
}
- if (*s == '/') {
+ res = inode->i_op->follow_link(dentry, &last->cookie);
+ if (IS_ERR_OR_NULL(res)) {
+ last->cookie = NULL;
+ return res;
+ }
+ }
+ if (*res == '/') {
+ if (nd->flags & LOOKUP_RCU) {
+ struct dentry *d;
+ if (!nd->root.mnt)
+ set_root_rcu(nd);
+ nd->path = nd->root;
+ d = nd->path.dentry;
+ nd->inode = d->d_inode;
+ nd->seq = nd->root_seq;
+ if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
+ return ERR_PTR(-ECHILD);
+ } else {
if (!nd->root.mnt)
set_root(nd);
path_put(&nd->path);
nd->path = nd->root;
path_get(&nd->root);
- nd->flags |= LOOKUP_JUMPED;
+ nd->inode = nd->path.dentry->d_inode;
}
- nd->inode = nd->path.dentry->d_inode;
- error = link_path_walk(s, nd);
- if (unlikely(error))
- put_link(nd, link, *p);
+ nd->flags |= LOOKUP_JUMPED;
+ while (unlikely(*++res == '/'))
+ ;
}
-
- return error;
-
-out_put_nd_path:
- *p = NULL;
- path_put(&nd->path);
- path_put(link);
- return error;
-}
-
-static int follow_up_rcu(struct path *path)
-{
- struct mount *mnt = real_mount(path->mnt);
- struct mount *parent;
- struct dentry *mountpoint;
-
- parent = mnt->mnt_parent;
- if (&parent->mnt == path->mnt)
- return 0;
- mountpoint = mnt->mnt_mountpoint;
- path->dentry = mountpoint;
- path->mnt = &parent->mnt;
- return 1;
+ if (!*res)
+ res = NULL;
+ return res;
}
/*
@@ -965,7 +1066,7 @@ EXPORT_SYMBOL(follow_up);
* - return -EISDIR to tell follow_managed() to stop and return the path we
* were called with.
*/
-static int follow_automount(struct path *path, unsigned flags,
+static int follow_automount(struct path *path, struct nameidata *nd,
bool *need_mntput)
{
struct vfsmount *mnt;
@@ -985,13 +1086,13 @@ static int follow_automount(struct path *path, unsigned flags,
* as being automount points. These will need the attentions
* of the daemon to instantiate them before they can be used.
*/
- if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
+ if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+ LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
path->dentry->d_inode)
return -EISDIR;
- current->total_link_count++;
- if (current->total_link_count >= 40)
+ nd->total_link_count++;
+ if (nd->total_link_count >= 40)
return -ELOOP;
mnt = path->dentry->d_op->d_automount(path);
@@ -1005,7 +1106,7 @@ static int follow_automount(struct path *path, unsigned flags,
* the path being looked up; if it wasn't then the remainder of
* the path is inaccessible and we should say so.
*/
- if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
+ if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
return -EREMOTE;
return PTR_ERR(mnt);
}
@@ -1045,7 +1146,7 @@ static int follow_automount(struct path *path, unsigned flags,
*
* Serialization is taken care of in namespace.c
*/
-static int follow_managed(struct path *path, unsigned flags)
+static int follow_managed(struct path *path, struct nameidata *nd)
{
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
unsigned managed;
@@ -1089,7 +1190,7 @@ static int follow_managed(struct path *path, unsigned flags)
/* Handle an automount point */
if (managed & DCACHE_NEED_AUTOMOUNT) {
- ret = follow_automount(path, flags, &need_mntput);
+ ret = follow_automount(path, nd, &need_mntput);
if (ret < 0)
break;
continue;
@@ -1103,7 +1204,11 @@ static int follow_managed(struct path *path, unsigned flags)
mntput(path->mnt);
if (ret == -EISDIR)
ret = 0;
- return ret < 0 ? ret : need_mntput;
+ if (need_mntput)
+ nd->flags |= LOOKUP_JUMPED;
+ if (unlikely(ret < 0))
+ path_put_conditional(path, nd);
+ return ret;
}
int follow_down_one(struct path *path)
@@ -1133,7 +1238,7 @@ static inline int managed_dentry_rcu(struct dentry *dentry)
* we meet a managed dentry that would need blocking.
*/
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
- struct inode **inode)
+ struct inode **inode, unsigned *seqp)
{
for (;;) {
struct mount *mounted;
@@ -1160,7 +1265,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
path->mnt = &mounted->mnt;
path->dentry = mounted->mnt.mnt_root;
nd->flags |= LOOKUP_JUMPED;
- nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+ *seqp = read_seqcount_begin(&path->dentry->d_seq);
/*
* Update the inode too. We don't need to re-check the
* dentry sequence number here after this d_inode read,
@@ -1179,10 +1284,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
set_root_rcu(nd);
while (1) {
- if (nd->path.dentry == nd->root.dentry &&
- nd->path.mnt == nd->root.mnt) {
+ if (path_equal(&nd->path, &nd->root))
break;
- }
if (nd->path.dentry != nd->path.mnt->mnt_root) {
struct dentry *old = nd->path.dentry;
struct dentry *parent = old->d_parent;
@@ -1190,38 +1293,42 @@ static int follow_dotdot_rcu(struct nameidata *nd)
inode = parent->d_inode;
seq = read_seqcount_begin(&parent->d_seq);
- if (read_seqcount_retry(&old->d_seq, nd->seq))
- goto failed;
+ if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
+ return -ECHILD;
nd->path.dentry = parent;
nd->seq = seq;
break;
+ } else {
+ struct mount *mnt = real_mount(nd->path.mnt);
+ struct mount *mparent = mnt->mnt_parent;
+ struct dentry *mountpoint = mnt->mnt_mountpoint;
+ struct inode *inode2 = mountpoint->d_inode;
+ unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
+ if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+ return -ECHILD;
+ if (&mparent->mnt == nd->path.mnt)
+ break;
+ /* we know that mountpoint was pinned */
+ nd->path.dentry = mountpoint;
+ nd->path.mnt = &mparent->mnt;
+ inode = inode2;
+ nd->seq = seq;
}
- if (!follow_up_rcu(&nd->path))
- break;
- inode = nd->path.dentry->d_inode;
- nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
}
- while (d_mountpoint(nd->path.dentry)) {
+ while (unlikely(d_mountpoint(nd->path.dentry))) {
struct mount *mounted;
mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
+ if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+ return -ECHILD;
if (!mounted)
break;
nd->path.mnt = &mounted->mnt;
nd->path.dentry = mounted->mnt.mnt_root;
inode = nd->path.dentry->d_inode;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
- if (read_seqretry(&mount_lock, nd->m_seq))
- goto failed;
}
nd->inode = inode;
return 0;
-
-failed:
- nd->flags &= ~LOOKUP_RCU;
- if (!(nd->flags & LOOKUP_ROOT))
- nd->root.mnt = NULL;
- rcu_read_unlock();
- return -ECHILD;
}
/*
@@ -1400,7 +1507,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
* It _is_ time-critical.
*/
static int lookup_fast(struct nameidata *nd,
- struct path *path, struct inode **inode)
+ struct path *path, struct inode **inode,
+ unsigned *seqp)
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
@@ -1424,7 +1532,7 @@ static int lookup_fast(struct nameidata *nd,
* This sequence count validates that the inode matches
* the dentry name information from lookup.
*/
- *inode = dentry->d_inode;
+ *inode = d_backing_inode(dentry);
negative = d_is_negative(dentry);
if (read_seqcount_retry(&dentry->d_seq, seq))
return -ECHILD;
@@ -1440,8 +1548,8 @@ static int lookup_fast(struct nameidata *nd,
*/
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
- nd->seq = seq;
+ *seqp = seq;
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
status = d_revalidate(dentry, nd->flags);
if (unlikely(status <= 0)) {
@@ -1452,10 +1560,10 @@ static int lookup_fast(struct nameidata *nd,
}
path->mnt = mnt;
path->dentry = dentry;
- if (likely(__follow_mount_rcu(nd, path, inode)))
+ if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
return 0;
unlazy:
- if (unlazy_walk(nd, dentry))
+ if (unlazy_walk(nd, dentry, seq))
return -ECHILD;
} else {
dentry = __d_lookup(parent, &nd->last);
@@ -1482,15 +1590,10 @@ unlazy:
}
path->mnt = mnt;
path->dentry = dentry;
- err = follow_managed(path, nd->flags);
- if (unlikely(err < 0)) {
- path_put_conditional(path, nd);
- return err;
- }
- if (err)
- nd->flags |= LOOKUP_JUMPED;
- *inode = path->dentry->d_inode;
- return 0;
+ err = follow_managed(path, nd);
+ if (likely(!err))
+ *inode = d_backing_inode(path->dentry);
+ return err;
need_lookup:
return 1;
@@ -1500,7 +1603,6 @@ need_lookup:
static int lookup_slow(struct nameidata *nd, struct path *path)
{
struct dentry *dentry, *parent;
- int err;
parent = nd->path.dentry;
BUG_ON(nd->inode != parent->d_inode);
@@ -1512,14 +1614,7 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
return PTR_ERR(dentry);
path->mnt = nd->path.mnt;
path->dentry = dentry;
- err = follow_managed(path, nd->flags);
- if (unlikely(err < 0)) {
- path_put_conditional(path, nd);
- return err;
- }
- if (err)
- nd->flags |= LOOKUP_JUMPED;
- return 0;
+ return follow_managed(path, nd);
}
static inline int may_lookup(struct nameidata *nd)
@@ -1528,7 +1623,7 @@ static inline int may_lookup(struct nameidata *nd)
int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD)
return err;
- if (unlazy_walk(nd, NULL))
+ if (unlazy_walk(nd, NULL, 0))
return -ECHILD;
}
return inode_permission(nd->inode, MAY_EXEC);
@@ -1538,24 +1633,45 @@ static inline int handle_dots(struct nameidata *nd, int type)
{
if (type == LAST_DOTDOT) {
if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
+ return follow_dotdot_rcu(nd);
} else
follow_dotdot(nd);
}
return 0;
}
-static void terminate_walk(struct nameidata *nd)
+static int pick_link(struct nameidata *nd, struct path *link,
+ struct inode *inode, unsigned seq)
{
+ int error;
+ struct saved *last;
+ if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
+ path_to_nameidata(link, nd);
+ return -ELOOP;
+ }
if (!(nd->flags & LOOKUP_RCU)) {
- path_put(&nd->path);
- } else {
- nd->flags &= ~LOOKUP_RCU;
- if (!(nd->flags & LOOKUP_ROOT))
- nd->root.mnt = NULL;
- rcu_read_unlock();
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+ }
+ error = nd_alloc_stack(nd);
+ if (unlikely(error)) {
+ if (error == -ECHILD) {
+ if (unlikely(unlazy_link(nd, link, seq)))
+ return -ECHILD;
+ error = nd_alloc_stack(nd);
+ }
+ if (error) {
+ path_put(link);
+ return error;
+ }
}
+
+ last = nd->stack + nd->depth++;
+ last->link = *link;
+ last->cookie = NULL;
+ last->inode = inode;
+ last->seq = seq;
+ return 1;
}
/*
@@ -1564,98 +1680,68 @@ static void terminate_walk(struct nameidata *nd)
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
-static inline int should_follow_link(struct dentry *dentry, int follow)
+static inline int should_follow_link(struct nameidata *nd, struct path *link,
+ int follow,
+ struct inode *inode, unsigned seq)
{
- return unlikely(d_is_symlink(dentry)) ? follow : 0;
+ if (likely(!d_is_symlink(link->dentry)))
+ return 0;
+ if (!follow)
+ return 0;
+ return pick_link(nd, link, inode, seq);
}
-static inline int walk_component(struct nameidata *nd, struct path *path,
- int follow)
+enum {WALK_GET = 1, WALK_PUT = 2};
+
+static int walk_component(struct nameidata *nd, int flags)
{
+ struct path path;
struct inode *inode;
+ unsigned seq;
int err;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
* parent relationships.
*/
- if (unlikely(nd->last_type != LAST_NORM))
- return handle_dots(nd, nd->last_type);
- err = lookup_fast(nd, path, &inode);
+ if (unlikely(nd->last_type != LAST_NORM)) {
+ err = handle_dots(nd, nd->last_type);
+ if (flags & WALK_PUT)
+ put_link(nd);
+ return err;
+ }
+ err = lookup_fast(nd, &path, &inode, &seq);
if (unlikely(err)) {
if (err < 0)
- goto out_err;
+ return err;
- err = lookup_slow(nd, path);
+ err = lookup_slow(nd, &path);
if (err < 0)
- goto out_err;
+ return err;
- inode = path->dentry->d_inode;
+ inode = d_backing_inode(path.dentry);
+ seq = 0; /* we are already out of RCU mode */
err = -ENOENT;
- if (d_is_negative(path->dentry))
+ if (d_is_negative(path.dentry))
goto out_path_put;
}
- if (should_follow_link(path->dentry, follow)) {
- if (nd->flags & LOOKUP_RCU) {
- if (unlikely(nd->path.mnt != path->mnt ||
- unlazy_walk(nd, path->dentry))) {
- err = -ECHILD;
- goto out_err;
- }
- }
- BUG_ON(inode != path->dentry->d_inode);
- return 1;
- }
- path_to_nameidata(path, nd);
+ if (flags & WALK_PUT)
+ put_link(nd);
+ err = should_follow_link(nd, &path, flags & WALK_GET, inode, seq);
+ if (unlikely(err))
+ return err;
+ path_to_nameidata(&path, nd);
nd->inode = inode;
+ nd->seq = seq;
return 0;
out_path_put:
- path_to_nameidata(path, nd);
-out_err:
- terminate_walk(nd);
+ path_to_nameidata(&path, nd);
return err;
}
/*
- * This limits recursive symlink follows to 8, while
- * limiting consecutive symlinks to 40.
- *
- * Without that kind of total limit, nasty chains of consecutive
- * symlinks can cause almost arbitrarily long lookups.
- */
-static inline int nested_symlink(struct path *path, struct nameidata *nd)
-{
- int res;
-
- if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
- path_put_conditional(path, nd);
- path_put(&nd->path);
- return -ELOOP;
- }
- BUG_ON(nd->depth >= MAX_NESTED_LINKS);
-
- nd->depth++;
- current->link_count++;
-
- do {
- struct path link = *path;
- void *cookie;
-
- res = follow_link(&link, nd, &cookie);
- if (res)
- break;
- res = walk_component(nd, path, LOOKUP_FOLLOW);
- put_link(nd, &link, cookie);
- } while (res > 0);
-
- current->link_count--;
- nd->depth--;
- return res;
-}
-
-/*
* We can do the critical dentry name comparison and hashing
* operations one word at a time, but we are limited to:
*
@@ -1781,9 +1867,8 @@ static inline u64 hash_name(const char *name)
*/
static int link_path_walk(const char *name, struct nameidata *nd)
{
- struct path next;
int err;
-
+
while (*name=='/')
name++;
if (!*name)
@@ -1796,7 +1881,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
err = may_lookup(nd);
if (err)
- break;
+ return err;
hash_len = hash_name(name);
@@ -1818,7 +1903,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
struct qstr this = { { .hash_len = hash_len }, .name = name };
err = parent->d_op->d_hash(parent, &this);
if (err < 0)
- break;
+ return err;
hash_len = this.hash_len;
name = this.name;
}
@@ -1830,7 +1915,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
name += hashlen_len(hash_len);
if (!*name)
- return 0;
+ goto OK;
/*
* If it wasn't NUL, we know it was '/'. Skip that
* slash, and continue until no more slashes.
@@ -1838,57 +1923,73 @@ static int link_path_walk(const char *name, struct nameidata *nd)
do {
name++;
} while (unlikely(*name == '/'));
- if (!*name)
- return 0;
-
- err = walk_component(nd, &next, LOOKUP_FOLLOW);
+ if (unlikely(!*name)) {
+OK:
+ /* pathname body, done */
+ if (!nd->depth)
+ return 0;
+ name = nd->stack[nd->depth - 1].name;
+ /* trailing symlink, done */
+ if (!name)
+ return 0;
+ /* last component of nested symlink */
+ err = walk_component(nd, WALK_GET | WALK_PUT);
+ } else {
+ err = walk_component(nd, WALK_GET);
+ }
if (err < 0)
return err;
if (err) {
- err = nested_symlink(&next, nd);
- if (err)
- return err;
- }
- if (!d_can_lookup(nd->path.dentry)) {
- err = -ENOTDIR;
- break;
+ const char *s = get_link(nd);
+
+ if (unlikely(IS_ERR(s)))
+ return PTR_ERR(s);
+ err = 0;
+ if (unlikely(!s)) {
+ /* jumped */
+ put_link(nd);
+ } else {
+ nd->stack[nd->depth - 1].name = name;
+ name = s;
+ continue;
+ }
}
+ if (unlikely(!d_can_lookup(nd->path.dentry)))
+ return -ENOTDIR;
}
- terminate_walk(nd);
- return err;
}
-static int path_init(int dfd, const struct filename *name, unsigned int flags,
- struct nameidata *nd)
+static const char *path_init(struct nameidata *nd, unsigned flags)
{
int retval = 0;
- const char *s = name->name;
+ const char *s = nd->name->name;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0;
- nd->base = NULL;
+ nd->total_link_count = 0;
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
if (*s) {
if (!d_can_lookup(root))
- return -ENOTDIR;
+ return ERR_PTR(-ENOTDIR);
retval = inode_permission(inode, MAY_EXEC);
if (retval)
- return retval;
+ return ERR_PTR(retval);
}
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ nd->root_seq = nd->seq;
nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
- goto done;
+ return s;
}
nd->root.mnt = NULL;
@@ -1903,7 +2004,7 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags,
path_get(&nd->root);
}
nd->path = nd->root;
- } else if (dfd == AT_FDCWD) {
+ } else if (nd->dfd == AT_FDCWD) {
if (flags & LOOKUP_RCU) {
struct fs_struct *fs = current->fs;
unsigned seq;
@@ -1920,180 +2021,205 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags,
}
} else {
/* Caller must check execute permissions on the starting path component */
- struct fd f = fdget_raw(dfd);
+ struct fd f = fdget_raw(nd->dfd);
struct dentry *dentry;
if (!f.file)
- return -EBADF;
+ return ERR_PTR(-EBADF);
dentry = f.file->f_path.dentry;
if (*s) {
if (!d_can_lookup(dentry)) {
fdput(f);
- return -ENOTDIR;
+ return ERR_PTR(-ENOTDIR);
}
}
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
- if (f.flags & FDPUT_FPUT)
- nd->base = f.file;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
+ nd->inode = nd->path.dentry->d_inode;
+ nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
} else {
path_get(&nd->path);
- fdput(f);
+ nd->inode = nd->path.dentry->d_inode;
}
+ fdput(f);
+ return s;
}
nd->inode = nd->path.dentry->d_inode;
if (!(flags & LOOKUP_RCU))
- goto done;
+ return s;
if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
- goto done;
+ return s;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
- return -ECHILD;
-done:
- current->total_link_count = 0;
- return link_path_walk(s, nd);
+ return ERR_PTR(-ECHILD);
}
-static void path_cleanup(struct nameidata *nd)
+static const char *trailing_symlink(struct nameidata *nd)
{
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
- }
- if (unlikely(nd->base))
- fput(nd->base);
+ const char *s;
+ int error = may_follow_link(nd);
+ if (unlikely(error))
+ return ERR_PTR(error);
+ nd->flags |= LOOKUP_PARENT;
+ nd->stack[0].name = NULL;
+ s = get_link(nd);
+ return s ? s : "";
}
-static inline int lookup_last(struct nameidata *nd, struct path *path)
+static inline int lookup_last(struct nameidata *nd)
{
if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
nd->flags &= ~LOOKUP_PARENT;
- return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW);
+ return walk_component(nd,
+ nd->flags & LOOKUP_FOLLOW
+ ? nd->depth
+ ? WALK_PUT | WALK_GET
+ : WALK_GET
+ : 0);
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const struct filename *name,
- unsigned int flags, struct nameidata *nd)
+static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
{
- struct path path;
+ const char *s = path_init(nd, flags);
int err;
- /*
- * Path walking is largely split up into 2 different synchronisation
- * schemes, rcu-walk and ref-walk (explained in
- * Documentation/filesystems/path-lookup.txt). These share much of the
- * path walk code, but some things particularly setup, cleanup, and
- * following mounts are sufficiently divergent that functions are
- * duplicated. Typically there is a function foo(), and its RCU
- * analogue, foo_rcu().
- *
- * -ECHILD is the error number of choice (just to avoid clashes) that
- * is returned if some aspect of an rcu-walk fails. Such an error must
- * be handled by restarting a traditional ref-walk (which will always
- * be able to complete).
- */
- err = path_init(dfd, name, flags, nd);
- if (!err && !(flags & LOOKUP_PARENT)) {
- err = lookup_last(nd, &path);
- while (err > 0) {
- void *cookie;
- struct path link = path;
- err = may_follow_link(&link, nd);
- if (unlikely(err))
- break;
- nd->flags |= LOOKUP_PARENT;
- err = follow_link(&link, nd, &cookie);
- if (err)
- break;
- err = lookup_last(nd, &path);
- put_link(nd, &link, cookie);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+ while (!(err = link_path_walk(s, nd))
+ && ((err = lookup_last(nd)) > 0)) {
+ s = trailing_symlink(nd);
+ if (IS_ERR(s)) {
+ err = PTR_ERR(s);
+ break;
}
}
-
if (!err)
err = complete_walk(nd);
- if (!err && nd->flags & LOOKUP_DIRECTORY) {
- if (!d_can_lookup(nd->path.dentry)) {
- path_put(&nd->path);
+ if (!err && nd->flags & LOOKUP_DIRECTORY)
+ if (!d_can_lookup(nd->path.dentry))
err = -ENOTDIR;
- }
+ if (!err) {
+ *path = nd->path;
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
}
-
- path_cleanup(nd);
+ terminate_walk(nd);
return err;
}
-static int filename_lookup(int dfd, struct filename *name,
- unsigned int flags, struct nameidata *nd)
+static int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ struct path *path, struct path *root)
{
- int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
+ int retval;
+ struct nameidata nd;
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+ if (unlikely(root)) {
+ nd.root = *root;
+ flags |= LOOKUP_ROOT;
+ }
+ set_nameidata(&nd, dfd, name);
+ retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
if (unlikely(retval == -ECHILD))
- retval = path_lookupat(dfd, name, flags, nd);
+ retval = path_lookupat(&nd, flags, path);
if (unlikely(retval == -ESTALE))
- retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+ retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
if (likely(!retval))
- audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
+ audit_inode(name, path->dentry, flags & LOOKUP_PARENT);
+ restore_nameidata();
+ putname(name);
return retval;
}
+/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+static int path_parentat(struct nameidata *nd, unsigned flags,
+ struct path *parent)
+{
+ const char *s = path_init(nd, flags);
+ int err;
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+ err = link_path_walk(s, nd);
+ if (!err)
+ err = complete_walk(nd);
+ if (!err) {
+ *parent = nd->path;
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
+ }
+ terminate_walk(nd);
+ return err;
+}
+
+static struct filename *filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type)
+{
+ int retval;
+ struct nameidata nd;
+
+ if (IS_ERR(name))
+ return name;
+ set_nameidata(&nd, dfd, name);
+ retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
+ if (unlikely(retval == -ECHILD))
+ retval = path_parentat(&nd, flags, parent);
+ if (unlikely(retval == -ESTALE))
+ retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
+ if (likely(!retval)) {
+ *last = nd.last;
+ *type = nd.last_type;
+ audit_inode(name, parent->dentry, LOOKUP_PARENT);
+ } else {
+ putname(name);
+ name = ERR_PTR(retval);
+ }
+ restore_nameidata();
+ return name;
+}
+
/* does lookup, returns the object with parent locked */
struct dentry *kern_path_locked(const char *name, struct path *path)
{
- struct filename *filename = getname_kernel(name);
- struct nameidata nd;
+ struct filename *filename;
struct dentry *d;
- int err;
+ struct qstr last;
+ int type;
+ filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
+ &last, &type);
if (IS_ERR(filename))
return ERR_CAST(filename);
-
- err = filename_lookup(AT_FDCWD, filename, LOOKUP_PARENT, &nd);
- if (err) {
- d = ERR_PTR(err);
- goto out;
- }
- if (nd.last_type != LAST_NORM) {
- path_put(&nd.path);
- d = ERR_PTR(-EINVAL);
- goto out;
+ if (unlikely(type != LAST_NORM)) {
+ path_put(path);
+ putname(filename);
+ return ERR_PTR(-EINVAL);
}
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- d = __lookup_hash(&nd.last, nd.path.dentry, 0);
+ mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ d = __lookup_hash(&last, path->dentry, 0);
if (IS_ERR(d)) {
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- path_put(&nd.path);
- goto out;
+ mutex_unlock(&path->dentry->d_inode->i_mutex);
+ path_put(path);
}
- *path = nd.path;
-out:
putname(filename);
return d;
}
int kern_path(const char *name, unsigned int flags, struct path *path)
{
- struct nameidata nd;
- struct filename *filename = getname_kernel(name);
- int res = PTR_ERR(filename);
-
- if (!IS_ERR(filename)) {
- res = filename_lookup(AT_FDCWD, filename, flags, &nd);
- putname(filename);
- if (!res)
- *path = nd.path;
- }
- return res;
+ return filename_lookup(AT_FDCWD, getname_kernel(name),
+ flags, path, NULL);
}
EXPORT_SYMBOL(kern_path);
@@ -2109,36 +2235,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct path *path)
{
- struct filename *filename = getname_kernel(name);
- int err = PTR_ERR(filename);
-
- BUG_ON(flags & LOOKUP_PARENT);
-
- /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
- if (!IS_ERR(filename)) {
- struct nameidata nd;
- nd.root.dentry = dentry;
- nd.root.mnt = mnt;
- err = filename_lookup(AT_FDCWD, filename,
- flags | LOOKUP_ROOT, &nd);
- if (!err)
- *path = nd.path;
- putname(filename);
- }
- return err;
+ struct path root = {.mnt = mnt, .dentry = dentry};
+ /* the first argument of filename_lookup() is ignored with root */
+ return filename_lookup(AT_FDCWD, getname_kernel(name),
+ flags , path, &root);
}
EXPORT_SYMBOL(vfs_path_lookup);
-/*
- * Restricted form of lookup. Doesn't follow links, single-component only,
- * needs parent already locked. Doesn't follow mounts.
- * SMP-safe.
- */
-static struct dentry *lookup_hash(struct nameidata *nd)
-{
- return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
-}
-
/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
@@ -2193,27 +2296,10 @@ EXPORT_SYMBOL(lookup_one_len);
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
- struct nameidata nd;
- struct filename *tmp = getname_flags(name, flags, empty);
- int err = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
-
- BUG_ON(flags & LOOKUP_PARENT);
-
- err = filename_lookup(dfd, tmp, flags, &nd);
- putname(tmp);
- if (!err)
- *path = nd.path;
- }
- return err;
-}
-
-int user_path_at(int dfd, const char __user *name, unsigned flags,
- struct path *path)
-{
- return user_path_at_empty(dfd, name, flags, path, NULL);
+ return filename_lookup(dfd, getname_flags(name, flags, empty),
+ flags, path, NULL);
}
-EXPORT_SYMBOL(user_path_at);
+EXPORT_SYMBOL(user_path_at_empty);
/*
* NB: most callers don't do anything directly with the reference to the
@@ -2221,26 +2307,16 @@ EXPORT_SYMBOL(user_path_at);
* allocated by getname. So we must hold the reference to it until all
* path-walking is complete.
*/
-static struct filename *
-user_path_parent(int dfd, const char __user *path, struct nameidata *nd,
+static inline struct filename *
+user_path_parent(int dfd, const char __user *path,
+ struct path *parent,
+ struct qstr *last,
+ int *type,
unsigned int flags)
{
- struct filename *s = getname(path);
- int error;
-
/* only LOOKUP_REVAL is allowed in extra flags */
- flags &= LOOKUP_REVAL;
-
- if (IS_ERR(s))
- return s;
-
- error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd);
- if (error) {
- putname(s);
- return ERR_PTR(error);
- }
-
- return s;
+ return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL,
+ parent, last, type);
}
/**
@@ -2279,10 +2355,8 @@ mountpoint_last(struct nameidata *nd, struct path *path)
/* If we're in rcuwalk, drop out of it to handle last component */
if (nd->flags & LOOKUP_RCU) {
- if (unlazy_walk(nd, NULL)) {
- error = -ECHILD;
- goto out;
- }
+ if (unlazy_walk(nd, NULL, 0))
+ return -ECHILD;
}
nd->flags &= ~LOOKUP_PARENT;
@@ -2290,7 +2364,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
if (unlikely(nd->last_type != LAST_NORM)) {
error = handle_dots(nd, nd->last_type);
if (error)
- goto out;
+ return error;
dentry = dget(nd->path.dentry);
goto done;
}
@@ -2305,74 +2379,60 @@ mountpoint_last(struct nameidata *nd, struct path *path)
*/
dentry = d_alloc(dir, &nd->last);
if (!dentry) {
- error = -ENOMEM;
mutex_unlock(&dir->d_inode->i_mutex);
- goto out;
+ return -ENOMEM;
}
dentry = lookup_real(dir->d_inode, dentry, nd->flags);
- error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
- goto out;
+ return PTR_ERR(dentry);
}
}
mutex_unlock(&dir->d_inode->i_mutex);
done:
if (d_is_negative(dentry)) {
- error = -ENOENT;
dput(dentry);
- goto out;
+ return -ENOENT;
}
+ if (nd->depth)
+ put_link(nd);
path->dentry = dentry;
path->mnt = nd->path.mnt;
- if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
- return 1;
+ error = should_follow_link(nd, path, nd->flags & LOOKUP_FOLLOW,
+ d_backing_inode(dentry), 0);
+ if (unlikely(error))
+ return error;
mntget(path->mnt);
follow_mount(path);
- error = 0;
-out:
- terminate_walk(nd);
- return error;
+ return 0;
}
/**
* path_mountpoint - look up a path to be umounted
- * @dfd: directory file descriptor to start walk from
- * @name: full pathname to walk
- * @path: pointer to container for result
+ * @nameidata: lookup context
* @flags: lookup flags
+ * @path: pointer to container for result
*
* Look up the given name, but don't attempt to revalidate the last component.
* Returns 0 and "path" will be valid on success; Returns error otherwise.
*/
static int
-path_mountpoint(int dfd, const struct filename *name, struct path *path,
- unsigned int flags)
+path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
{
- struct nameidata nd;
+ const char *s = path_init(nd, flags);
int err;
-
- err = path_init(dfd, name, flags, &nd);
- if (unlikely(err))
- goto out;
-
- err = mountpoint_last(&nd, path);
- while (err > 0) {
- void *cookie;
- struct path link = *path;
- err = may_follow_link(&link, &nd);
- if (unlikely(err))
- break;
- nd.flags |= LOOKUP_PARENT;
- err = follow_link(&link, &nd, &cookie);
- if (err)
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+ while (!(err = link_path_walk(s, nd)) &&
+ (err = mountpoint_last(nd, path)) > 0) {
+ s = trailing_symlink(nd);
+ if (IS_ERR(s)) {
+ err = PTR_ERR(s);
break;
- err = mountpoint_last(&nd, path);
- put_link(&nd, &link, cookie);
+ }
}
-out:
- path_cleanup(&nd);
+ terminate_walk(nd);
return err;
}
@@ -2380,16 +2440,19 @@ static int
filename_mountpoint(int dfd, struct filename *name, struct path *path,
unsigned int flags)
{
+ struct nameidata nd;
int error;
if (IS_ERR(name))
return PTR_ERR(name);
- error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
+ set_nameidata(&nd, dfd, name);
+ error = path_mountpoint(&nd, flags | LOOKUP_RCU, path);
if (unlikely(error == -ECHILD))
- error = path_mountpoint(dfd, name, path, flags);
+ error = path_mountpoint(&nd, flags, path);
if (unlikely(error == -ESTALE))
- error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
+ error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
if (likely(!error))
audit_inode(name, path->dentry, 0);
+ restore_nameidata();
putname(name);
return error;
}
@@ -2456,7 +2519,7 @@ EXPORT_SYMBOL(__check_sticky);
*/
static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
{
- struct inode *inode = victim->d_inode;
+ struct inode *inode = d_backing_inode(victim);
int error;
if (d_is_negative(victim))
@@ -2922,18 +2985,19 @@ out_dput:
/*
* Handle the last step of open()
*/
-static int do_last(struct nameidata *nd, struct path *path,
+static int do_last(struct nameidata *nd,
struct file *file, const struct open_flags *op,
- int *opened, struct filename *name)
+ int *opened)
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
bool got_write = false;
int acc_mode = op->acc_mode;
+ unsigned seq;
struct inode *inode;
- bool symlink_ok = false;
struct path save_parent = { .dentry = NULL, .mnt = NULL };
+ struct path path;
bool retried = false;
int error;
@@ -2942,7 +3006,7 @@ static int do_last(struct nameidata *nd, struct path *path,
if (nd->last_type != LAST_NORM) {
error = handle_dots(nd, nd->last_type);
- if (error)
+ if (unlikely(error))
return error;
goto finish_open;
}
@@ -2950,15 +3014,13 @@ static int do_last(struct nameidata *nd, struct path *path,
if (!(open_flag & O_CREAT)) {
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
- symlink_ok = true;
/* we _can_ be in RCU mode here */
- error = lookup_fast(nd, path, &inode);
+ error = lookup_fast(nd, &path, &inode, &seq);
if (likely(!error))
goto finish_lookup;
if (error < 0)
- goto out;
+ return error;
BUG_ON(nd->inode != dir->d_inode);
} else {
@@ -2972,11 +3034,10 @@ static int do_last(struct nameidata *nd, struct path *path,
if (error)
return error;
- audit_inode(name, dir, LOOKUP_PARENT);
- error = -EISDIR;
+ audit_inode(nd->name, dir, LOOKUP_PARENT);
/* trailing slashes? */
- if (nd->last.name[nd->last.len])
- goto out;
+ if (unlikely(nd->last.name[nd->last.len]))
+ return -EISDIR;
}
retry_lookup:
@@ -2991,7 +3052,7 @@ retry_lookup:
*/
}
mutex_lock(&dir->d_inode->i_mutex);
- error = lookup_open(nd, path, file, op, got_write, opened);
+ error = lookup_open(nd, &path, file, op, got_write, opened);
mutex_unlock(&dir->d_inode->i_mutex);
if (error <= 0) {
@@ -3002,7 +3063,7 @@ retry_lookup:
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
- audit_inode(name, file->f_path.dentry, 0);
+ audit_inode(nd->name, file->f_path.dentry, 0);
goto opened;
}
@@ -3011,15 +3072,15 @@ retry_lookup:
open_flag &= ~O_TRUNC;
will_truncate = false;
acc_mode = MAY_OPEN;
- path_to_nameidata(path, nd);
+ path_to_nameidata(&path, nd);
goto finish_open_created;
}
/*
* create/update audit record if it already exists.
*/
- if (d_is_positive(path->dentry))
- audit_inode(name, path->dentry, 0);
+ if (d_is_positive(path.dentry))
+ audit_inode(nd->name, path.dentry, 0);
/*
* If atomic_open() acquired write access it is dropped now due to
@@ -3031,47 +3092,45 @@ retry_lookup:
got_write = false;
}
- error = -EEXIST;
- if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
- goto exit_dput;
-
- error = follow_managed(path, nd->flags);
- if (error < 0)
- goto exit_dput;
+ if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
+ path_to_nameidata(&path, nd);
+ return -EEXIST;
+ }
- if (error)
- nd->flags |= LOOKUP_JUMPED;
+ error = follow_managed(&path, nd);
+ if (unlikely(error < 0))
+ return error;
BUG_ON(nd->flags & LOOKUP_RCU);
- inode = path->dentry->d_inode;
- error = -ENOENT;
- if (d_is_negative(path->dentry)) {
- path_to_nameidata(path, nd);
- goto out;
+ inode = d_backing_inode(path.dentry);
+ seq = 0; /* out of RCU mode, so the value doesn't matter */
+ if (unlikely(d_is_negative(path.dentry))) {
+ path_to_nameidata(&path, nd);
+ return -ENOENT;
}
finish_lookup:
- /* we _can_ be in RCU mode here */
- if (should_follow_link(path->dentry, !symlink_ok)) {
- if (nd->flags & LOOKUP_RCU) {
- if (unlikely(nd->path.mnt != path->mnt ||
- unlazy_walk(nd, path->dentry))) {
- error = -ECHILD;
- goto out;
- }
- }
- BUG_ON(inode != path->dentry->d_inode);
- return 1;
+ if (nd->depth)
+ put_link(nd);
+ error = should_follow_link(nd, &path, nd->flags & LOOKUP_FOLLOW,
+ inode, seq);
+ if (unlikely(error))
+ return error;
+
+ if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
+ path_to_nameidata(&path, nd);
+ return -ELOOP;
}
- if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
- path_to_nameidata(path, nd);
+ if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
+ path_to_nameidata(&path, nd);
} else {
save_parent.dentry = nd->path.dentry;
- save_parent.mnt = mntget(path->mnt);
- nd->path.dentry = path->dentry;
+ save_parent.mnt = mntget(path.mnt);
+ nd->path.dentry = path.dentry;
}
nd->inode = inode;
+ nd->seq = seq;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
finish_open:
error = complete_walk(nd);
@@ -3079,7 +3138,7 @@ finish_open:
path_put(&save_parent);
return error;
}
- audit_inode(name, nd->path.dentry, 0);
+ audit_inode(nd->name, nd->path.dentry, 0);
error = -EISDIR;
if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
goto out;
@@ -3126,12 +3185,8 @@ out:
if (got_write)
mnt_drop_write(nd->path.mnt);
path_put(&save_parent);
- terminate_walk(nd);
return error;
-exit_dput:
- path_put_conditional(path, nd);
- goto out;
exit_fput:
fput(file);
goto out;
@@ -3155,50 +3210,46 @@ stale_open:
goto retry_lookup;
}
-static int do_tmpfile(int dfd, struct filename *pathname,
- struct nameidata *nd, int flags,
+static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
struct file *file, int *opened)
{
static const struct qstr name = QSTR_INIT("/", 1);
- struct dentry *dentry, *child;
+ struct dentry *child;
struct inode *dir;
- int error = path_lookupat(dfd, pathname,
- flags | LOOKUP_DIRECTORY, nd);
+ struct path path;
+ int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
if (unlikely(error))
return error;
- error = mnt_want_write(nd->path.mnt);
+ error = mnt_want_write(path.mnt);
if (unlikely(error))
goto out;
+ dir = path.dentry->d_inode;
/* we want directory to be writable */
- error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out2;
- dentry = nd->path.dentry;
- dir = dentry->d_inode;
if (!dir->i_op->tmpfile) {
error = -EOPNOTSUPP;
goto out2;
}
- child = d_alloc(dentry, &name);
+ child = d_alloc(path.dentry, &name);
if (unlikely(!child)) {
error = -ENOMEM;
goto out2;
}
- nd->flags &= ~LOOKUP_DIRECTORY;
- nd->flags |= op->intent;
- dput(nd->path.dentry);
- nd->path.dentry = child;
- error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode);
+ dput(path.dentry);
+ path.dentry = child;
+ error = dir->i_op->tmpfile(dir, child, op->mode);
if (error)
goto out2;
- audit_inode(pathname, nd->path.dentry, 0);
+ audit_inode(nd->name, child, 0);
/* Don't check for other permissions, the inode was just created */
- error = may_open(&nd->path, MAY_OPEN, op->open_flag);
+ error = may_open(&path, MAY_OPEN, op->open_flag);
if (error)
goto out2;
- file->f_path.mnt = nd->path.mnt;
- error = finish_open(file, nd->path.dentry, NULL, opened);
+ file->f_path.mnt = path.mnt;
+ error = finish_open(file, child, NULL, opened);
if (error)
goto out2;
error = open_check_o_direct(file);
@@ -3211,17 +3262,17 @@ static int do_tmpfile(int dfd, struct filename *pathname,
spin_unlock(&inode->i_lock);
}
out2:
- mnt_drop_write(nd->path.mnt);
+ mnt_drop_write(path.mnt);
out:
- path_put(&nd->path);
+ path_put(&path);
return error;
}
-static struct file *path_openat(int dfd, struct filename *pathname,
- struct nameidata *nd, const struct open_flags *op, int flags)
+static struct file *path_openat(struct nameidata *nd,
+ const struct open_flags *op, unsigned flags)
{
+ const char *s;
struct file *file;
- struct path path;
int opened = 0;
int error;
@@ -3232,37 +3283,25 @@ static struct file *path_openat(int dfd, struct filename *pathname,
file->f_flags = op->open_flag;
if (unlikely(file->f_flags & __O_TMPFILE)) {
- error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
+ error = do_tmpfile(nd, flags, op, file, &opened);
goto out2;
}
- error = path_init(dfd, pathname, flags, nd);
- if (unlikely(error))
- goto out;
-
- error = do_last(nd, &path, file, op, &opened, pathname);
- while (unlikely(error > 0)) { /* trailing symlink */
- struct path link = path;
- void *cookie;
- if (!(nd->flags & LOOKUP_FOLLOW)) {
- path_put_conditional(&path, nd);
- path_put(&nd->path);
- error = -ELOOP;
- break;
- }
- error = may_follow_link(&link, nd);
- if (unlikely(error))
- break;
- nd->flags |= LOOKUP_PARENT;
+ s = path_init(nd, flags);
+ if (IS_ERR(s)) {
+ put_filp(file);
+ return ERR_CAST(s);
+ }
+ while (!(error = link_path_walk(s, nd)) &&
+ (error = do_last(nd, file, op, &opened)) > 0) {
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
- error = follow_link(&link, nd, &cookie);
- if (unlikely(error))
+ s = trailing_symlink(nd);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
break;
- error = do_last(nd, &path, file, op, &opened, pathname);
- put_link(nd, &link, cookie);
+ }
}
-out:
- path_cleanup(nd);
+ terminate_walk(nd);
out2:
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
@@ -3287,11 +3326,13 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
int flags = op->lookup_flags;
struct file *filp;
- filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
+ set_nameidata(&nd, dfd, pathname);
+ filp = path_openat(&nd, op, flags | LOOKUP_RCU);
if (unlikely(filp == ERR_PTR(-ECHILD)))
- filp = path_openat(dfd, pathname, &nd, op, flags);
+ filp = path_openat(&nd, op, flags);
if (unlikely(filp == ERR_PTR(-ESTALE)))
- filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
+ filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
+ restore_nameidata();
return filp;
}
@@ -3313,11 +3354,13 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
if (unlikely(IS_ERR(filename)))
return ERR_CAST(filename);
- file = path_openat(-1, filename, &nd, op, flags | LOOKUP_RCU);
+ set_nameidata(&nd, -1, filename);
+ file = path_openat(&nd, op, flags | LOOKUP_RCU);
if (unlikely(file == ERR_PTR(-ECHILD)))
- file = path_openat(-1, filename, &nd, op, flags);
+ file = path_openat(&nd, op, flags);
if (unlikely(file == ERR_PTR(-ESTALE)))
- file = path_openat(-1, filename, &nd, op, flags | LOOKUP_REVAL);
+ file = path_openat(&nd, op, flags | LOOKUP_REVAL);
+ restore_nameidata();
putname(filename);
return file;
}
@@ -3326,7 +3369,8 @@ static struct dentry *filename_create(int dfd, struct filename *name,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
- struct nameidata nd;
+ struct qstr last;
+ int type;
int err2;
int error;
bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
@@ -3337,26 +3381,25 @@ static struct dentry *filename_create(int dfd, struct filename *name,
*/
lookup_flags &= LOOKUP_REVAL;
- error = filename_lookup(dfd, name, LOOKUP_PARENT|lookup_flags, &nd);
- if (error)
- return ERR_PTR(error);
+ name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ if (IS_ERR(name))
+ return ERR_CAST(name);
/*
* Yucky last component or no last component at all?
* (foo/., foo/.., /////)
*/
- if (nd.last_type != LAST_NORM)
+ if (unlikely(type != LAST_NORM))
goto out;
- nd.flags &= ~LOOKUP_PARENT;
- nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
/* don't fail immediately if it's r/o, at least try to report other errors */
- err2 = mnt_want_write(nd.path.mnt);
+ err2 = mnt_want_write(path->mnt);
/*
* Do the final lookup.
*/
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_hash(&nd);
+ lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+ mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ dentry = __lookup_hash(&last, path->dentry, lookup_flags);
if (IS_ERR(dentry))
goto unlock;
@@ -3370,7 +3413,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
- if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
+ if (unlikely(!is_dir && last.name[last.len])) {
error = -ENOENT;
goto fail;
}
@@ -3378,31 +3421,26 @@ static struct dentry *filename_create(int dfd, struct filename *name,
error = err2;
goto fail;
}
- *path = nd.path;
+ putname(name);
return dentry;
fail:
dput(dentry);
dentry = ERR_PTR(error);
unlock:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ mutex_unlock(&path->dentry->d_inode->i_mutex);
if (!err2)
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path->mnt);
out:
- path_put(&nd.path);
+ path_put(path);
+ putname(name);
return dentry;
}
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
- struct filename *filename = getname_kernel(pathname);
- struct dentry *res;
-
- if (IS_ERR(filename))
- return ERR_CAST(filename);
- res = filename_create(dfd, filename, path, lookup_flags);
- putname(filename);
- return res;
+ return filename_create(dfd, getname_kernel(pathname),
+ path, lookup_flags);
}
EXPORT_SYMBOL(kern_path_create);
@@ -3415,16 +3453,10 @@ void done_path_create(struct path *path, struct dentry *dentry)
}
EXPORT_SYMBOL(done_path_create);
-struct dentry *user_path_create(int dfd, const char __user *pathname,
+inline struct dentry *user_path_create(int dfd, const char __user *pathname,
struct path *path, unsigned int lookup_flags)
{
- struct filename *tmp = getname(pathname);
- struct dentry *res;
- if (IS_ERR(tmp))
- return ERR_CAST(tmp);
- res = filename_create(dfd, tmp, path, lookup_flags);
- putname(tmp);
- return res;
+ return filename_create(dfd, getname(pathname), path, lookup_flags);
}
EXPORT_SYMBOL(user_path_create);
@@ -3645,14 +3677,17 @@ static long do_rmdir(int dfd, const char __user *pathname)
int error = 0;
struct filename *name;
struct dentry *dentry;
- struct nameidata nd;
+ struct path path;
+ struct qstr last;
+ int type;
unsigned int lookup_flags = 0;
retry:
- name = user_path_parent(dfd, pathname, &nd, lookup_flags);
+ name = user_path_parent(dfd, pathname,
+ &path, &last, &type, lookup_flags);
if (IS_ERR(name))
return PTR_ERR(name);
- switch(nd.last_type) {
+ switch (type) {
case LAST_DOTDOT:
error = -ENOTEMPTY;
goto exit1;
@@ -3664,13 +3699,12 @@ retry:
goto exit1;
}
- nd.flags &= ~LOOKUP_PARENT;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto exit1;
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_hash(&nd);
+ mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
@@ -3678,17 +3712,17 @@ retry:
error = -ENOENT;
goto exit3;
}
- error = security_path_rmdir(&nd.path, dentry);
+ error = security_path_rmdir(&path, dentry);
if (error)
goto exit3;
- error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+ error = vfs_rmdir(path.dentry->d_inode, dentry);
exit3:
dput(dentry);
exit2:
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- mnt_drop_write(nd.path.mnt);
+ mutex_unlock(&path.dentry->d_inode->i_mutex);
+ mnt_drop_write(path.mnt);
exit1:
- path_put(&nd.path);
+ path_put(&path);
putname(name);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -3771,43 +3805,45 @@ static long do_unlinkat(int dfd, const char __user *pathname)
int error;
struct filename *name;
struct dentry *dentry;
- struct nameidata nd;
+ struct path path;
+ struct qstr last;
+ int type;
struct inode *inode = NULL;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = user_path_parent(dfd, pathname, &nd, lookup_flags);
+ name = user_path_parent(dfd, pathname,
+ &path, &last, &type, lookup_flags);
if (IS_ERR(name))
return PTR_ERR(name);
error = -EISDIR;
- if (nd.last_type != LAST_NORM)
+ if (type != LAST_NORM)
goto exit1;
- nd.flags &= ~LOOKUP_PARENT;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto exit1;
retry_deleg:
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_hash(&nd);
+ mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
- if (nd.last.name[nd.last.len])
+ if (last.name[last.len])
goto slashes;
inode = dentry->d_inode;
if (d_is_negative(dentry))
goto slashes;
ihold(inode);
- error = security_path_unlink(&nd.path, dentry);
+ error = security_path_unlink(&path, dentry);
if (error)
goto exit2;
- error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
+ error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
exit2:
dput(dentry);
}
- mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ mutex_unlock(&path.dentry->d_inode->i_mutex);
if (inode)
iput(inode); /* truncate the inode here */
inode = NULL;
@@ -3816,9 +3852,9 @@ exit2:
if (!error)
goto retry_deleg;
}
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path.mnt);
exit1:
- path_put(&nd.path);
+ path_put(&path);
putname(name);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -4248,14 +4284,15 @@ EXPORT_SYMBOL(vfs_rename);
SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, unsigned int, flags)
{
- struct dentry *old_dir, *new_dir;
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
- struct nameidata oldnd, newnd;
+ struct path old_path, new_path;
+ struct qstr old_last, new_last;
+ int old_type, new_type;
struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
- unsigned int lookup_flags = 0;
+ unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
bool should_retry = false;
int error;
@@ -4269,47 +4306,45 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
return -EPERM;
+ if (flags & RENAME_EXCHANGE)
+ target_flags = 0;
+
retry:
- from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
+ from = user_path_parent(olddfd, oldname,
+ &old_path, &old_last, &old_type, lookup_flags);
if (IS_ERR(from)) {
error = PTR_ERR(from);
goto exit;
}
- to = user_path_parent(newdfd, newname, &newnd, lookup_flags);
+ to = user_path_parent(newdfd, newname,
+ &new_path, &new_last, &new_type, lookup_flags);
if (IS_ERR(to)) {
error = PTR_ERR(to);
goto exit1;
}
error = -EXDEV;
- if (oldnd.path.mnt != newnd.path.mnt)
+ if (old_path.mnt != new_path.mnt)
goto exit2;
- old_dir = oldnd.path.dentry;
error = -EBUSY;
- if (oldnd.last_type != LAST_NORM)
+ if (old_type != LAST_NORM)
goto exit2;
- new_dir = newnd.path.dentry;
if (flags & RENAME_NOREPLACE)
error = -EEXIST;
- if (newnd.last_type != LAST_NORM)
+ if (new_type != LAST_NORM)
goto exit2;
- error = mnt_want_write(oldnd.path.mnt);
+ error = mnt_want_write(old_path.mnt);
if (error)
goto exit2;
- oldnd.flags &= ~LOOKUP_PARENT;
- newnd.flags &= ~LOOKUP_PARENT;
- if (!(flags & RENAME_EXCHANGE))
- newnd.flags |= LOOKUP_RENAME_TARGET;
-
retry_deleg:
- trap = lock_rename(new_dir, old_dir);
+ trap = lock_rename(new_path.dentry, old_path.dentry);
- old_dentry = lookup_hash(&oldnd);
+ old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
@@ -4317,7 +4352,7 @@ retry_deleg:
error = -ENOENT;
if (d_is_negative(old_dentry))
goto exit4;
- new_dentry = lookup_hash(&newnd);
+ new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
@@ -4331,16 +4366,16 @@ retry_deleg:
if (!d_is_dir(new_dentry)) {
error = -ENOTDIR;
- if (newnd.last.name[newnd.last.len])
+ if (new_last.name[new_last.len])
goto exit5;
}
}
/* unless the source is a directory trailing slashes give -ENOTDIR */
if (!d_is_dir(old_dentry)) {
error = -ENOTDIR;
- if (oldnd.last.name[oldnd.last.len])
+ if (old_last.name[old_last.len])
goto exit5;
- if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len])
+ if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
goto exit5;
}
/* source should not be ancestor of target */
@@ -4353,32 +4388,32 @@ retry_deleg:
if (new_dentry == trap)
goto exit5;
- error = security_path_rename(&oldnd.path, old_dentry,
- &newnd.path, new_dentry, flags);
+ error = security_path_rename(&old_path, old_dentry,
+ &new_path, new_dentry, flags);
if (error)
goto exit5;
- error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry,
+ error = vfs_rename(old_path.dentry->d_inode, old_dentry,
+ new_path.dentry->d_inode, new_dentry,
&delegated_inode, flags);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
- unlock_rename(new_dir, old_dir);
+ unlock_rename(new_path.dentry, old_path.dentry);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
goto retry_deleg;
}
- mnt_drop_write(oldnd.path.mnt);
+ mnt_drop_write(old_path.mnt);
exit2:
if (retry_estale(error, lookup_flags))
should_retry = true;
- path_put(&newnd.path);
+ path_put(&new_path);
putname(to);
exit1:
- path_put(&oldnd.path);
+ path_put(&old_path);
putname(from);
if (should_retry) {
should_retry = false;
@@ -4437,18 +4472,19 @@ EXPORT_SYMBOL(readlink_copy);
*/
int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- struct nameidata nd;
void *cookie;
+ struct inode *inode = d_inode(dentry);
+ const char *link = inode->i_link;
int res;
- nd.depth = 0;
- cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
- if (IS_ERR(cookie))
- return PTR_ERR(cookie);
-
- res = readlink_copy(buffer, buflen, nd_get_link(&nd));
- if (dentry->d_inode->i_op->put_link)
- dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
+ if (!link) {
+ link = inode->i_op->follow_link(dentry, &cookie);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+ res = readlink_copy(buffer, buflen, link);
+ if (inode->i_op->put_link)
+ inode->i_op->put_link(inode, cookie);
return res;
}
EXPORT_SYMBOL(generic_readlink);
@@ -4480,22 +4516,21 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
}
EXPORT_SYMBOL(page_readlink);
-void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+const char *page_follow_link_light(struct dentry *dentry, void **cookie)
{
struct page *page = NULL;
- nd_set_link(nd, page_getlink(dentry, &page));
- return page;
+ char *res = page_getlink(dentry, &page);
+ if (!IS_ERR(res))
+ *cookie = page;
+ return res;
}
EXPORT_SYMBOL(page_follow_link_light);
-void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+void page_put_link(struct inode *unused, void *cookie)
{
struct page *page = cookie;
-
- if (page) {
- kunmap(page);
- page_cache_release(page);
- }
+ kunmap(page);
+ page_cache_release(page);
}
EXPORT_SYMBOL(page_put_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1b9e11167bae..9c1c43d0d4f1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -590,24 +590,35 @@ static void delayed_free_vfsmnt(struct rcu_head *head)
}
/* call under rcu_read_lock */
-bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
{
struct mount *mnt;
if (read_seqretry(&mount_lock, seq))
- return false;
+ return 1;
if (bastard == NULL)
- return true;
+ return 0;
mnt = real_mount(bastard);
mnt_add_count(mnt, 1);
if (likely(!read_seqretry(&mount_lock, seq)))
- return true;
+ return 0;
if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
mnt_add_count(mnt, -1);
- return false;
+ return 1;
+ }
+ return -1;
+}
+
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+ int res = __legitimize_mnt(bastard, seq);
+ if (likely(!res))
+ return true;
+ if (unlikely(res < 0)) {
+ rcu_read_unlock();
+ mntput(bastard);
+ rcu_read_lock();
}
- rcu_read_unlock();
- mntput(bastard);
- rcu_read_lock();
return false;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 45b35b9b1e36..55e1e3af23a3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -38,6 +38,7 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/errno.h>
+#include <linux/file.h>
#include <linux/string.h>
#include <linux/ratelimit.h>
#include <linux/printk.h>
@@ -5604,6 +5605,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->server = server;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
+ get_file(fl->fl_file);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
out_free_seqid:
@@ -5716,6 +5718,7 @@ static void nfs4_lock_release(void *calldata)
nfs_free_seqid(data->arg.lock_seqid);
nfs4_put_lock_state(data->lsp);
put_nfs_open_context(data->ctx);
+ fput(data->fl.fl_file);
kfree(data);
dprintk("%s: done!\n", __func__);
}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 2d56200655fe..b6de433da5db 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -20,7 +20,6 @@
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/string.h>
-#include <linux/namei.h>
/* Symlink caching in the page cache is even more simplistic
* and straight-forward than readdir caching.
@@ -43,7 +42,7 @@ error:
return -EIO;
}
-static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
{
struct inode *inode = d_inode(dentry);
struct page *page;
@@ -51,19 +50,13 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
- goto read_failed;
+ return err;
page = read_cache_page(&inode->i_data, 0,
(filler_t *)nfs_symlink_filler, inode);
- if (IS_ERR(page)) {
- err = page;
- goto read_failed;
- }
- nd_set_link(nd, kmap(page));
- return page;
-
-read_failed:
- nd_set_link(nd, err);
- return NULL;
+ if (IS_ERR(page))
+ return ERR_CAST(page);
+ *cookie = page;
+ return kmap(page);
}
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d12a4be613a5..dfc19f1575a1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1845,12 +1845,15 @@ int nfs_wb_all(struct inode *inode)
trace_nfs_writeback_inode_enter(inode);
ret = filemap_write_and_wait(inode->i_mapping);
- if (!ret) {
- ret = nfs_commit_inode(inode, FLUSH_SYNC);
- if (!ret)
- pnfs_sync_inode(inode, true);
- }
+ if (ret)
+ goto out;
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret < 0)
+ goto out;
+ pnfs_sync_inode(inode, true);
+ ret = 0;
+out:
trace_nfs_writeback_inode_exit(inode, ret);
return ret;
}
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 0f35b80d17fe..443abecf01b7 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -35,7 +35,7 @@
* ntfs_lookup - find the inode represented by a dentry in a directory inode
* @dir_ino: directory inode in which to look for the inode
* @dent: dentry representing the inode to look for
- * @nd: lookup nameidata
+ * @flags: lookup flags
*
* In short, ntfs_lookup() looks for the inode represented by the dentry @dent
* in the directory inode @dir_ino and if found attaches the inode to the
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index 082234581d05..83f4e76511c2 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb,
goto out;
found:
- *return_block = i * bits_per_entry + bit;
+ *return_block = (u64) i * bits_per_entry + bit;
*return_size = run;
ret = set_run(sb, i, bits_per_entry, bit, run, 1);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 138321b0c6c2..3d935c81789a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = {
*/
static int omfs_get_imap(struct super_block *sb)
{
- unsigned int bitmap_size, count, array_size;
+ unsigned int bitmap_size, array_size;
+ int count;
struct omfs_sb_info *sbi = OMFS_SB(sb);
struct buffer_head *bh;
unsigned long **ptr;
@@ -359,7 +360,7 @@ nomem:
}
enum {
- Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+ Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err
};
static const match_table_t tokens = {
@@ -368,6 +369,7 @@ static const match_table_t tokens = {
{Opt_umask, "umask=%o"},
{Opt_dmask, "dmask=%o"},
{Opt_fmask, "fmask=%o"},
+ {Opt_err, NULL},
};
static int parse_options(char *options, struct omfs_sb_info *sbi)
@@ -548,8 +550,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_root = d_make_root(root);
- if (!sb->s_root)
+ if (!sb->s_root) {
+ ret = -ENOMEM;
goto out_brelse_bh2;
+ }
printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
ret = 0;
diff --git a/fs/open.c b/fs/open.c
index 98e5a52dc68c..e0250bdcc440 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -367,7 +367,7 @@ retry:
if (res)
goto out;
- inode = path.dentry->d_inode;
+ inode = d_backing_inode(path.dentry);
if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
/*
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 24f640441bd9..84d693d37428 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct cred *override_cred;
char *link = NULL;
+ if (WARN_ON(!workdir))
+ return -EROFS;
+
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index d139405d2bfa..692ceda3bc21 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct kstat stat;
int err;
+ if (WARN_ON(!workdir))
+ return ERR_PTR(-EROFS);
+
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
@@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry;
int err;
+ if (WARN_ON(!workdir))
+ return -EROFS;
+
err = ovl_lock_rename_workdir(workdir, upperdir);
if (err)
goto out;
@@ -506,11 +512,28 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
struct dentry *opaquedir = NULL;
int err;
- if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
- opaquedir = ovl_check_empty_and_clear(dentry);
- err = PTR_ERR(opaquedir);
- if (IS_ERR(opaquedir))
- goto out;
+ if (WARN_ON(!workdir))
+ return -EROFS;
+
+ if (is_dir) {
+ if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
+ opaquedir = ovl_check_empty_and_clear(dentry);
+ err = PTR_ERR(opaquedir);
+ if (IS_ERR(opaquedir))
+ goto out;
+ } else {
+ LIST_HEAD(list);
+
+ /*
+ * When removing an empty opaque directory, then it
+ * makes no sense to replace it with an exact replica of
+ * itself. But emptiness still needs to be checked.
+ */
+ err = ovl_check_empty_dir(dentry, &list);
+ ovl_cache_free(&list);
+ if (err)
+ goto out;
+ }
}
err = ovl_lock_rename_workdir(workdir, upperdir);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 04f124884687..308379b2d0b2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -140,11 +140,12 @@ struct ovl_link_data {
void *cookie;
};
-static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ovl_follow_link(struct dentry *dentry, void **cookie)
{
- void *ret;
struct dentry *realdentry;
struct inode *realinode;
+ struct ovl_link_data *data = NULL;
+ const char *ret;
realdentry = ovl_dentry_real(dentry);
realinode = realdentry->d_inode;
@@ -152,28 +153,28 @@ static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
if (WARN_ON(!realinode->i_op->follow_link))
return ERR_PTR(-EPERM);
- ret = realinode->i_op->follow_link(realdentry, nd);
- if (IS_ERR(ret))
- return ret;
-
if (realinode->i_op->put_link) {
- struct ovl_link_data *data;
-
data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
- if (!data) {
- realinode->i_op->put_link(realdentry, nd, ret);
+ if (!data)
return ERR_PTR(-ENOMEM);
- }
data->realdentry = realdentry;
- data->cookie = ret;
+ }
- return data;
- } else {
- return NULL;
+ ret = realinode->i_op->follow_link(realdentry, cookie);
+ if (IS_ERR_OR_NULL(ret)) {
+ kfree(data);
+ return ret;
}
+
+ if (data)
+ data->cookie = *cookie;
+
+ *cookie = data;
+
+ return ret;
}
-static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+static void ovl_put_link(struct inode *unused, void *c)
{
struct inode *realinode;
struct ovl_link_data *data = c;
@@ -182,7 +183,7 @@ static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
return;
realinode = data->realdentry->d_inode;
- realinode->i_op->put_link(data->realdentry, nd, data->cookie);
+ realinode->i_op->put_link(realinode, data->cookie);
kfree(data);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5f0d1993e6e3..bf8537c7f455 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ufs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
+ if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
return -EROFS;
return 0;
@@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
err = PTR_ERR(ufs->workdir);
if (IS_ERR(ufs->workdir)) {
- pr_err("overlayfs: failed to create directory %s/%s\n",
- ufs->config.workdir, OVL_WORKDIR_NAME);
- goto out_put_upper_mnt;
+ pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ ufs->config.workdir, OVL_WORKDIR_NAME, -err);
+ sb->s_flags |= MS_RDONLY;
+ ufs->workdir = NULL;
}
}
@@ -997,7 +998,6 @@ out_put_lower_mnt:
kfree(ufs->lower_mnt);
out_put_workdir:
dput(ufs->workdir);
-out_put_upper_mnt:
mntput(ufs->upper_mnt);
out_put_lowerpath:
for (i = 0; i < numlower; i++)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 093ca14f5701..286a422f440e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1380,7 +1380,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
return -ENOENT;
}
-static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
{
struct inode *inode = d_inode(dentry);
struct path path;
@@ -1394,7 +1394,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
if (error)
goto out;
- nd_jump_link(nd, &path);
+ nd_jump_link(&path);
return NULL;
out:
return ERR_PTR(error);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8272aaba1bb0..afe232b9df6e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -23,7 +23,6 @@
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/magic.h>
-#include <linux/namei.h>
#include <asm/uaccess.h>
@@ -394,16 +393,16 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
-static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_follow_link(struct dentry *dentry, void **cookie)
{
struct proc_dir_entry *pde = PDE(d_inode(dentry));
if (unlikely(!use_pde(pde)))
return ERR_PTR(-EINVAL);
- nd_set_link(nd, pde->data);
- return pde;
+ *cookie = pde;
+ return pde->data;
}
-static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+static void proc_put_link(struct inode *unused, void *p)
{
unuse_pde(p);
}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index e512642dbbdc..f6e8354b8cea 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,7 +30,7 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
-static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
{
struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
@@ -45,7 +45,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
if (ptrace_may_access(task, PTRACE_MODE_READ)) {
error = ns_get_path(&ns_path, task, ns_ops);
if (!error)
- nd_jump_link(nd, &ns_path);
+ nd_jump_link(&ns_path);
}
put_task_struct(task);
return error;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 6195b4a7c3b1..113b8d061fc0 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,5 +1,4 @@
#include <linux/sched.h>
-#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/pid_namespace.h>
#include "internal.h"
@@ -19,21 +18,20 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
{
struct pid_namespace *ns = dentry->d_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
- char *name = ERR_PTR(-ENOENT);
- if (tgid) {
- /* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- name = ERR_PTR(-ENOMEM);
- else
- sprintf(name, "%d", tgid);
- }
- nd_set_link(nd, name);
- return NULL;
+ char *name;
+
+ if (!tgid)
+ return ERR_PTR(-ENOENT);
+ /* 11 for max length of signed int in decimal + NULL term */
+ name = kmalloc(12, GFP_KERNEL);
+ if (!name)
+ return ERR_PTR(-ENOMEM);
+ sprintf(name, "%d", tgid);
+ return *cookie = name;
}
static const struct inode_operations proc_self_inode_operations = {
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index a8371993b4fb..947b0f4fd0a1 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -1,5 +1,4 @@
#include <linux/sched.h>
-#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/pid_namespace.h>
#include "internal.h"
@@ -20,21 +19,20 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
{
struct pid_namespace *ns = dentry->d_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
- char *name = ERR_PTR(-ENOENT);
- if (pid) {
- name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
- if (!name)
- name = ERR_PTR(-ENOMEM);
- else
- sprintf(name, "%d/task/%d", tgid, pid);
- }
- nd_set_link(nd, name);
- return NULL;
+ char *name;
+
+ if (!pid)
+ return ERR_PTR(-ENOENT);
+ name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
+ if (!name)
+ return ERR_PTR(-ENOMEM);
+ sprintf(name, "%d/task/%d", tgid, pid);
+ return *cookie = name;
}
static const struct inode_operations proc_thread_self_inode_operations = {
diff --git a/fs/select.c b/fs/select.c
index f684c750e08a..015547330e88 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
* doesn't imply write barrier and the users expect write
* barrier semantics on wakeup functions. The following
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
- * and is paired with set_mb() in poll_schedule_timeout.
+ * and is paired with smp_store_mb() in poll_schedule_timeout.
*/
smp_wmb();
pwq->triggered = 1;
@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
/*
* Prepare for the next iteration.
*
- * The following set_mb() serves two purposes. First, it's
+ * The following smp_store_mb() serves two purposes. First, it's
* the counterpart rmb of the wmb in pollwake() such that data
* written before wake up is always visible after wake up.
* Second, the full barrier guarantees that triggered clearing
@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
* this problem doesn't exist for the first iteration as
* add_wait_queue() has full barrier semantics.
*/
- set_mb(pwq->triggered, 0);
+ smp_store_mb(pwq->triggered, 0);
return rc;
}
diff --git a/fs/splice.c b/fs/splice.c
index bfe62ae40f40..4f355a1c1a9e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -261,6 +261,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
return ret;
}
+EXPORT_SYMBOL_GPL(splice_to_pipe);
void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
diff --git a/fs/sysv/Makefile b/fs/sysv/Makefile
index 3591f9d7a48a..7a75e70a4b61 100644
--- a/fs/sysv/Makefile
+++ b/fs/sysv/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_SYSV_FS) += sysv.o
sysv-objs := ialloc.o balloc.o inode.o itree.o file.o dir.o \
- namei.o super.o symlink.o
+ namei.o super.o
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 88956309cc86..590ad9206e3f 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -166,8 +166,9 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
inode->i_op = &sysv_symlink_inode_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else {
- inode->i_op = &sysv_fast_symlink_inode_operations;
- nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size,
+ inode->i_op = &simple_symlink_inode_operations;
+ inode->i_link = (char *)SYSV_I(inode)->i_data;
+ nd_terminate_link(inode->i_link, inode->i_size,
sizeof(SYSV_I(inode)->i_data) - 1);
}
} else
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
deleted file mode 100644
index d3fa0d703314..000000000000
--- a/fs/sysv/symlink.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * linux/fs/sysv/symlink.c
- *
- * Handling of System V filesystem fast symlinks extensions.
- * Aug 2001, Christoph Hellwig (hch@infradead.org)
- */
-
-#include "sysv.h"
-#include <linux/namei.h>
-
-static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- nd_set_link(nd, (char *)SYSV_I(d_inode(dentry))->i_data);
- return NULL;
-}
-
-const struct inode_operations sysv_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = sysv_follow_link,
-};
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 69d488986cce..2c13525131cd 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -161,7 +161,6 @@ extern ino_t sysv_inode_by_name(struct dentry *);
extern const struct inode_operations sysv_file_inode_operations;
extern const struct inode_operations sysv_dir_inode_operations;
-extern const struct inode_operations sysv_fast_symlink_inode_operations;
extern const struct file_operations sysv_file_operations;
extern const struct file_operations sysv_dir_operations;
extern const struct address_space_operations sysv_aops;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 27060fc855d4..5c27c66c224a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -889,6 +889,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
memcpy(ui->data, symname, len);
((char *)ui->data)[len] = '\0';
+ inode->i_link = ui->data;
/*
* The terminating zero byte is not written to the flash media and it
* is put just to make later in-memory string processing simpler. Thus,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 35efc103c39c..a3dfe2ae79f2 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -51,7 +51,6 @@
#include "ubifs.h"
#include <linux/mount.h>
-#include <linux/namei.h>
#include <linux/slab.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
@@ -1300,14 +1299,6 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
ClearPageChecked(page);
}
-static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ubifs_inode *ui = ubifs_inode(d_inode(dentry));
-
- nd_set_link(nd, ui->data);
- return NULL;
-}
-
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
@@ -1570,7 +1561,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ubifs_follow_link,
+ .follow_link = simple_follow_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
.setxattr = ubifs_setxattr,
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 75e6f04bb795..20f5dbd7c6a8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -195,6 +195,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
}
memcpy(ui->data, ino->data, ui->data_len);
((char *)ui->data)[ui->data_len] = '\0';
+ inode->i_link = ui->data;
break;
case S_IFBLK:
case S_IFCHR:
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 541a12b5792d..541d9c65014d 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -168,7 +168,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
}
flen = udf_get_filename(sb, nameptr, lfi, fname, UDF_NAME_LEN);
- if (!flen)
+ if (flen < 0)
continue;
tloc = lelb_to_cpu(cfi.icb.extLocation);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7a95b8fed302..bddf3d071dae 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -152,8 +152,6 @@ out:
mutex_unlock(&inode->i_mutex);
if (retval > 0) {
- ssize_t err;
-
mark_inode_dirty(inode);
err = generic_write_sync(file, iocb->ki_pos - retval, retval);
if (err < 0)
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 5c03f0dfb98b..c97b5a8d1e24 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -138,6 +138,25 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
return 0;
}
+/**
+ * udf_find_entry - find entry in given directory.
+ *
+ * @dir: directory inode to search in
+ * @child: qstr of the name
+ * @fibh: buffer head / inode with file identifier descriptor we found
+ * @cfi: found file identifier descriptor with given name
+ *
+ * This function searches in the directory @dir for a file name @child. When
+ * found, @fibh points to the buffer head(s) (bh is NULL for in ICB
+ * directories) containing the file identifier descriptor (FID). In that case
+ * the function returns pointer to the FID in the buffer or inode - but note
+ * that FID may be split among two buffers (blocks) so accessing it via that
+ * pointer isn't easily possible. This pointer can be used only as an iterator
+ * for other directory manipulation functions. For inspection of the FID @cfi
+ * can be used - the found FID is copied there.
+ *
+ * Returns pointer to FID, NULL when nothing found, or error code.
+ */
static struct fileIdentDesc *udf_find_entry(struct inode *dir,
const struct qstr *child,
struct udf_fileident_bh *fibh,
@@ -167,8 +186,11 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
fibh->soffset = fibh->eoffset = f_pos & (sb->s_blocksize - 1);
if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
if (inode_bmap(dir, f_pos >> sb->s_blocksize_bits, &epos,
- &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30))
+ &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
+ fi = ERR_PTR(-EIO);
goto out_err;
+ }
+
block = udf_get_lb_pblock(sb, &eloc, offset);
if ((++offset << sb->s_blocksize_bits) < elen) {
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
@@ -179,19 +201,25 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
offset = 0;
fibh->sbh = fibh->ebh = udf_tread(sb, block);
- if (!fibh->sbh)
+ if (!fibh->sbh) {
+ fi = ERR_PTR(-EIO);
goto out_err;
+ }
}
fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
- if (!fname)
+ if (!fname) {
+ fi = ERR_PTR(-ENOMEM);
goto out_err;
+ }
while (f_pos < size) {
fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc,
&elen, &offset);
- if (!fi)
+ if (!fi) {
+ fi = ERR_PTR(-EIO);
goto out_err;
+ }
liu = le16_to_cpu(cfi->lengthOfImpUse);
lfi = cfi->lengthFileIdent;
@@ -234,12 +262,17 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
continue;
flen = udf_get_filename(sb, nameptr, lfi, fname, UDF_NAME_LEN);
- if (flen && udf_match(flen, fname, child->len, child->name))
+ if (flen < 0) {
+ fi = ERR_PTR(flen);
+ goto out_err;
+ }
+
+ if (udf_match(flen, fname, child->len, child->name))
goto out_ok;
}
-out_err:
fi = NULL;
+out_err:
if (fibh->sbh != fibh->ebh)
brelse(fibh->ebh);
brelse(fibh->sbh);
@@ -256,6 +289,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct fileIdentDesc cfi;
struct udf_fileident_bh fibh;
+ struct fileIdentDesc *fi;
if (dentry->d_name.len > UDF_NAME_LEN - 2)
return ERR_PTR(-ENAMETOOLONG);
@@ -275,7 +309,11 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
} else
#endif /* UDF_RECOVERY */
- if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) {
+ fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
+ if (IS_ERR(fi))
+ return ERR_CAST(fi);
+
+ if (fi) {
struct kernel_lb_addr loc;
if (fibh.sbh != fibh.ebh)
@@ -774,8 +812,11 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
retval = -ENOENT;
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
- if (!fi)
+ if (IS_ERR_OR_NULL(fi)) {
+ if (fi)
+ retval = PTR_ERR(fi);
goto out;
+ }
retval = -EIO;
tloc = lelb_to_cpu(cfi.icb.extLocation);
@@ -817,8 +858,12 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
retval = -ENOENT;
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
- if (!fi)
+
+ if (IS_ERR_OR_NULL(fi)) {
+ if (fi)
+ retval = PTR_ERR(fi);
goto out;
+ }
retval = -EIO;
tloc = lelb_to_cpu(cfi.icb.extLocation);
@@ -1049,24 +1094,30 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
struct udf_inode_info *old_iinfo = UDF_I(old_inode);
ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
- if (ofi) {
- if (ofibh.sbh != ofibh.ebh)
- brelse(ofibh.ebh);
- brelse(ofibh.sbh);
+ if (IS_ERR(ofi)) {
+ retval = PTR_ERR(ofi);
+ goto end_rename;
}
+
+ if (ofibh.sbh != ofibh.ebh)
+ brelse(ofibh.ebh);
+
+ brelse(ofibh.sbh);
tloc = lelb_to_cpu(ocfi.icb.extLocation);
if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0)
!= old_inode->i_ino)
goto end_rename;
nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi);
- if (nfi) {
- if (!new_inode) {
- if (nfibh.sbh != nfibh.ebh)
- brelse(nfibh.ebh);
- brelse(nfibh.sbh);
- nfi = NULL;
- }
+ if (IS_ERR(nfi)) {
+ retval = PTR_ERR(nfi);
+ goto end_rename;
+ }
+ if (nfi && !new_inode) {
+ if (nfibh.sbh != nfibh.ebh)
+ brelse(nfibh.ebh);
+ brelse(nfibh.sbh);
+ nfi = NULL;
}
if (S_ISDIR(old_inode->i_mode)) {
int offset = udf_ext0_offset(old_inode);
@@ -1221,7 +1272,7 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
static struct dentry *udf_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
- if ((fh_len != 3 && fh_len != 5) ||
+ if (fh_len < 3 ||
(fh_type != FILEID_UDF_WITH_PARENT &&
fh_type != FILEID_UDF_WITHOUT_PARENT))
return NULL;
@@ -1233,7 +1284,7 @@ static struct dentry *udf_fh_to_dentry(struct super_block *sb,
static struct dentry *udf_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
- if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT)
+ if (fh_len < 5 || fh_type != FILEID_UDF_WITH_PARENT)
return NULL;
return udf_nfs_get_inode(sb, fid->udf.parent_block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 6299f341967b..b96f190bc567 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -927,17 +927,23 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
#endif
}
- if (!udf_build_ustr(instr, pvoldesc->volIdent, 32))
- if (udf_CS0toUTF8(outstr, instr)) {
- strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
- outstr->u_len > 31 ? 31 : outstr->u_len);
- udf_debug("volIdent[] = '%s'\n",
- UDF_SB(sb)->s_volume_ident);
- }
+ if (!udf_build_ustr(instr, pvoldesc->volIdent, 32)) {
+ ret = udf_CS0toUTF8(outstr, instr);
+ if (ret < 0)
+ goto out_bh;
+
+ strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
+ outstr->u_len > 31 ? 31 : outstr->u_len);
+ udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
+ }
- if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
- if (udf_CS0toUTF8(outstr, instr))
- udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
+ if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) {
+ ret = udf_CS0toUTF8(outstr, instr);
+ if (ret < 0)
+ goto out_bh;
+
+ udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
+ }
ret = 0;
out_bh:
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 8dfbc4025e2f..862535b3ba58 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -82,6 +82,9 @@ static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
comp_len = udf_get_filename(sb, pc->componentIdent,
pc->lengthComponentIdent,
p, tolen);
+ if (comp_len < 0)
+ return comp_len;
+
p += comp_len;
tolen -= comp_len;
if (tolen == 0)
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index b84fee372734..ab478e62baae 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -68,21 +68,16 @@ int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
/*
* udf_build_ustr_exact
*/
-static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
+static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
{
- if ((!dest) || (!ptr) || (!exactsize))
- return -1;
-
memset(dest, 0, sizeof(struct ustr));
dest->u_cmpID = ptr[0];
dest->u_len = exactsize - 1;
memcpy(dest->u_name, ptr + 1, exactsize - 1);
-
- return 0;
}
/*
- * udf_ocu_to_utf8
+ * udf_CS0toUTF8
*
* PURPOSE
* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
@@ -94,7 +89,7 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
* both of type "struct ustr *"
*
* POST-CONDITIONS
- * <return> Zero on success.
+ * <return> >= 0 on success.
*
* HISTORY
* November 12, 1997 - Andrew E. Mileski
@@ -117,7 +112,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
memset(utf_o, 0, sizeof(struct ustr));
pr_err("unknown compression code (%d) stri=%s\n",
cmp_id, ocu_i->u_name);
- return 0;
+ return -EINVAL;
}
ocu = ocu_i->u_name;
@@ -154,7 +149,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
/*
*
- * udf_utf8_to_ocu
+ * udf_UTF8toCS0
*
* PURPOSE
* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
@@ -270,7 +265,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
memset(utf_o, 0, sizeof(struct ustr));
pr_err("unknown compression code (%d) stri=%s\n",
cmp_id, ocu_i->u_name);
- return 0;
+ return -EINVAL;
}
ocu = ocu_i->u_name;
@@ -338,43 +333,51 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
uint8_t *dname, int dlen)
{
struct ustr *filename, *unifilename;
- int len = 0;
+ int ret;
+
+ if (!slen)
+ return -EIO;
filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!filename)
- return 0;
+ return -ENOMEM;
unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
- if (!unifilename)
+ if (!unifilename) {
+ ret = -ENOMEM;
goto out1;
+ }
- if (udf_build_ustr_exact(unifilename, sname, slen))
- goto out2;
-
+ udf_build_ustr_exact(unifilename, sname, slen);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
- if (!udf_CS0toUTF8(filename, unifilename)) {
+ ret = udf_CS0toUTF8(filename, unifilename);
+ if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
goto out2;
}
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
- if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
- unifilename)) {
+ ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
+ unifilename);
+ if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
goto out2;
}
} else
- goto out2;
+ BUG();
- len = udf_translate_to_linux(dname, dlen,
+ ret = udf_translate_to_linux(dname, dlen,
filename->u_name, filename->u_len,
unifilename->u_name, unifilename->u_len);
+ /* Zero length filename isn't valid... */
+ if (ret == 0)
+ ret = -EINVAL;
out2:
kfree(unifilename);
out1:
kfree(filename);
- return len;
+ return ret;
}
int udf_put_filename(struct super_block *sb, const uint8_t *sname,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index be7d42c7d938..99aaf5c9bf4d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -572,9 +572,10 @@ static void ufs_set_inode_ops(struct inode *inode)
inode->i_fop = &ufs_dir_operations;
inode->i_mapping->a_ops = &ufs_aops;
} else if (S_ISLNK(inode->i_mode)) {
- if (!inode->i_blocks)
+ if (!inode->i_blocks) {
inode->i_op = &ufs_fast_symlink_inode_operations;
- else {
+ inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+ } else {
inode->i_op = &ufs_symlink_inode_operations;
inode->i_mapping->a_ops = &ufs_aops;
}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index e491a93a7e9a..f773deb1d2e3 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -144,7 +144,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
} else {
/* fast symlink */
inode->i_op = &ufs_fast_symlink_inode_operations;
- memcpy(UFS_I(inode)->i_u1.i_symlink, symname, l);
+ inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+ memcpy(inode->i_link, symname, l);
inode->i_size = l-1;
}
mark_inode_dirty(inode);
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index 5b537e2fdda3..874480bb43e9 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -25,23 +25,12 @@
* ext2 symlink handling code
*/
-#include <linux/fs.h>
-#include <linux/namei.h>
-
#include "ufs_fs.h"
#include "ufs.h"
-
-static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ufs_inode_info *p = UFS_I(d_inode(dentry));
- nd_set_link(nd, (char*)p->i_u1.i_symlink);
- return NULL;
-}
-
const struct inode_operations ufs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ufs_follow_link,
+ .follow_link = simple_follow_link,
.setattr = ufs_setattr,
};
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 04e79d57bca6..e9d401ce93bb 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
* After the last attribute is removed revert to original inode format,
* making all literal area available to the data fork once more.
*/
-STATIC void
-xfs_attr_fork_reset(
+void
+xfs_attr_fork_remove(
struct xfs_inode *ip,
struct xfs_trans *tp)
{
@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(mp->m_flags & XFS_MOUNT_ATTR2) &&
(dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
!(args->op_flags & XFS_DA_OP_ADDNAME)) {
- xfs_attr_fork_reset(dp, args->trans);
+ xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
if (forkoff == -1) {
ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
- xfs_attr_fork_reset(dp, args->trans);
+ xfs_attr_fork_remove(dp, args->trans);
goto out;
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 025c4b820c03..882c8d338891 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
-
+void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/*
* Internal routines when attribute fork size == XFS_LBSIZE(mp).
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index aeffeaaac0ec..f1026e86dabc 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
align_alen += temp;
align_off -= temp;
}
+
+ /* Same adjustment for the end of the requested area. */
+ temp = (align_alen % extsz);
+ if (temp)
+ align_alen += extsz - temp;
+
/*
- * Same adjustment for the end of the requested area.
+ * For large extent hint sizes, the aligned extent might be larger than
+ * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
+ * the length back under MAXEXTLEN. The outer allocation loops handle
+ * short allocation just fine, so it is safe to do this. We only want to
+ * do it when we are forced to, though, because it means more allocation
+ * operations are required.
*/
- if ((temp = (align_alen % extsz))) {
- align_alen += extsz - temp;
- }
+ while (align_alen > MAXEXTLEN)
+ align_alen -= extsz;
+ ASSERT(align_alen <= MAXEXTLEN);
+
/*
* If the previous block overlaps with this proposed allocation
* then move the start forward without adjusting the length.
@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
return -EINVAL;
} else {
ASSERT(orig_off >= align_off);
- ASSERT(orig_end <= align_off + align_alen);
+ /* see MAXEXTLEN handling above */
+ ASSERT(orig_end <= align_off + align_alen ||
+ align_alen + extsz > MAXEXTLEN);
}
#ifdef DEBUG
@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
/* Figure out the extent size, adjust alen */
extsz = xfs_get_extsz_hint(ip);
if (extsz) {
- /*
- * Make sure we don't exceed a single extent length when we
- * align the extent by reducing length we are going to
- * allocate by the maximum amount extent size aligment may
- * require.
- */
- alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 07349a183a11..1c9e75521250 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
*/
newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
- percpu_counter_read(&args.mp->m_icount) + newlen >
+ percpu_counter_read_positive(&args.mp->m_icount) + newlen >
args.mp->m_maxicount)
return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
@@ -1339,10 +1339,13 @@ xfs_dialloc(
* If we have already hit the ceiling of inode blocks then clear
* okalloc so we scan all available agi structures for a free
* inode.
+ *
+ * Read rough value of mp->m_icount by percpu_counter_read_positive,
+ * which will sacrifice the preciseness but improve the performance.
*/
if (mp->m_maxicount &&
- percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
- mp->m_maxicount) {
+ percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
+ > mp->m_maxicount) {
noroom = 1;
okalloc = 0;
}
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index f9c1c64782d3..3fbf167cfb4c 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
return error;
}
+/*
+ * xfs_attr_inactive kills all traces of an attribute fork on an inode. It
+ * removes both the on-disk and in-memory inode fork. Note that this also has to
+ * handle the condition of inodes without attributes but with an attribute fork
+ * configured, so we can't use xfs_inode_hasattr() here.
+ *
+ * The in-memory attribute fork is removed even on error.
+ */
int
-xfs_attr_inactive(xfs_inode_t *dp)
+xfs_attr_inactive(
+ struct xfs_inode *dp)
{
- xfs_trans_t *trans;
- xfs_mount_t *mp;
- int error;
+ struct xfs_trans *trans;
+ struct xfs_mount *mp;
+ int cancel_flags = 0;
+ int lock_mode = XFS_ILOCK_SHARED;
+ int error = 0;
mp = dp->i_mount;
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
- xfs_ilock(dp, XFS_ILOCK_SHARED);
- if (!xfs_inode_hasattr(dp) ||
- dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return 0;
- }
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
+ xfs_ilock(dp, lock_mode);
+ if (!XFS_IFORK_Q(dp))
+ goto out_destroy_fork;
+ xfs_iunlock(dp, lock_mode);
/*
* Start our first transaction of the day.
@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
* the inode in every transaction to let it float upward through
* the log.
*/
+ lock_mode = 0;
trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
- if (error) {
- xfs_trans_cancel(trans, 0);
- return error;
- }
- xfs_ilock(dp, XFS_ILOCK_EXCL);
+ if (error)
+ goto out_cancel;
+
+ lock_mode = XFS_ILOCK_EXCL;
+ cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
+ xfs_ilock(dp, lock_mode);
+
+ if (!XFS_IFORK_Q(dp))
+ goto out_cancel;
/*
* No need to make quota reservations here. We expect to release some
@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
*/
xfs_trans_ijoin(trans, dp, 0);
- /*
- * Decide on what work routines to call based on the inode size.
- */
- if (!xfs_inode_hasattr(dp) ||
- dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- error = 0;
- goto out;
+ /* invalidate and truncate the attribute fork extents */
+ if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+ error = xfs_attr3_root_inactive(&trans, dp);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+ if (error)
+ goto out_cancel;
}
- error = xfs_attr3_root_inactive(&trans, dp);
- if (error)
- goto out;
- error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
- if (error)
- goto out;
+ /* Reset the attribute fork - this also destroys the in-core fork */
+ xfs_attr_fork_remove(dp, trans);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
+ xfs_iunlock(dp, lock_mode);
return error;
-out:
- xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
+out_cancel:
+ xfs_trans_cancel(trans, cancel_flags);
+out_destroy_fork:
+ /* kill the in-core attr fork before we drop the inode lock */
+ if (dp->i_afp)
+ xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+ if (lock_mode)
+ xfs_iunlock(dp, lock_mode);
return error;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8121e75352ee..3b7591224f4a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -124,7 +124,7 @@ xfs_iozero(
status = 0;
} while (count);
- return (-status);
+ return status;
}
int
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d6ebc85192b7..539a85fddbc2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1946,21 +1946,17 @@ xfs_inactive(
/*
* If there are attributes associated with the file then blow them away
* now. The code calls a routine that recursively deconstructs the
- * attribute fork. We need to just commit the current transaction
- * because we can't use it for xfs_attr_inactive().
+ * attribute fork. If also blows away the in-core attribute fork.
*/
- if (ip->i_d.di_anextents > 0) {
- ASSERT(ip->i_d.di_forkoff != 0);
-
+ if (XFS_IFORK_Q(ip)) {
error = xfs_attr_inactive(ip);
if (error)
return;
}
- if (ip->i_afp)
- xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
+ ASSERT(!ip->i_afp);
ASSERT(ip->i_d.di_anextents == 0);
+ ASSERT(ip->i_d.di_forkoff == 0);
/*
* Free the inode.
@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
if (error)
return error;
- /* Satisfy xfs_bumplink that this is a real tmpfile */
+ /*
+ * Prepare the tmpfile inode as if it were created through the VFS.
+ * Otherwise, the link increment paths will complain about nlink 0->1.
+ * Drop the link count as done by d_tmpfile(), complete the inode setup
+ * and flag it as linkable.
+ */
+ drop_nlink(VFS_I(tmpfile));
xfs_finish_inode_setup(tmpfile);
VFS_I(tmpfile)->i_state |= I_LINKABLE;
@@ -3151,7 +3153,7 @@ xfs_rename(
* intermediate state on disk.
*/
if (wip) {
- ASSERT(wip->i_d.di_nlink == 0);
+ ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip);
if (error)
goto out_trans_abort;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f4cd7204e236..7f51f39f8acc 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -41,7 +41,6 @@
#include <linux/capability.h>
#include <linux/xattr.h>
-#include <linux/namei.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
#include <linux/fiemap.h>
@@ -414,10 +413,10 @@ xfs_vn_rename(
* we need to be very careful about how much stack we use.
* uio is kmalloced for this reason...
*/
-STATIC void *
+STATIC const char *
xfs_vn_follow_link(
struct dentry *dentry,
- struct nameidata *nd)
+ void **cookie)
{
char *link;
int error = -ENOMEM;
@@ -430,14 +429,12 @@ xfs_vn_follow_link(
if (unlikely(error))
goto out_kfree;
- nd_set_link(nd, link);
- return NULL;
+ return *cookie = link;
out_kfree:
kfree(link);
out_err:
- nd_set_link(nd, ERR_PTR(error));
- return NULL;
+ return ERR_PTR(error);
}
STATIC int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2ce7ee3b4ec1..6f23fbdfb365 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
return xfs_sync_sb(mp, true);
}
+/*
+ * Deltas for the inode count are +/-64, hence we use a large batch size
+ * of 128 so we don't need to take the counter lock on every update.
+ */
+#define XFS_ICOUNT_BATCH 128
int
xfs_mod_icount(
struct xfs_mount *mp,
int64_t delta)
{
- /* deltas are +/-64, hence the large batch size of 128. */
- __percpu_counter_add(&mp->m_icount, delta, 128);
- if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
+ __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+ if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta);
return -EINVAL;
@@ -1113,6 +1117,14 @@ xfs_mod_ifree(
return 0;
}
+/*
+ * Deltas for the block count can vary from 1 to very large, but lock contention
+ * only occurs on frequent small block count updates such as in the delayed
+ * allocation path for buffered writes (page a time updates). Hence we set
+ * a large batch count (1024) to minimise global counter updates except when
+ * we get near to ENOSPC and we have to be very accurate with our updates.
+ */
+#define XFS_FDBLOCKS_BATCH 1024
int
xfs_mod_fdblocks(
struct xfs_mount *mp,
@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
* Taking blocks away, need to be more accurate the closer we
* are to zero.
*
- * batch size is set to a maximum of 1024 blocks - if we are
- * allocating of freeing extents larger than this then we aren't
- * going to be hammering the counter lock so a lock per update
- * is not a problem.
- *
* If the counter has a value of less than 2 * max batch size,
* then make everything serialise as we are real close to
* ENOSPC.
*/
-#define __BATCH 1024
- if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+ if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
+ XFS_FDBLOCKS_BATCH) < 0)
batch = 1;
else
- batch = __BATCH;
-#undef __BATCH
+ batch = XFS_FDBLOCKS_BATCH;
__percpu_counter_add(&mp->m_fdblocks, delta, batch);
- if (percpu_counter_compare(&mp->m_fdblocks,
- XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+ if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
+ XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */
return 0;
}