summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/acl.c64
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/ext4.h57
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h6
-rw-r--r--fs/ext4/extents.c5
-rw-r--r--fs/ext4/file.c36
-rw-r--r--fs/ext4/fsync.c8
-rw-r--r--fs/ext4/ialloc.c48
-rw-r--r--fs/ext4/inode.c655
-rw-r--r--fs/ext4/ioctl.c57
-rw-r--r--fs/ext4/mballoc.c125
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/move_extent.c1320
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c46
20 files changed, 1818 insertions, 644 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8a34710ecf40..8867b2a1e5fe 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
- ext4_jbd2.o migrate.o mballoc.o block_validity.o
+ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 647e0d65a284..f6d8967149ca 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,30 +126,6 @@ fail:
return ERR_PTR(-EINVAL);
}
-static inline struct posix_acl *
-ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
- struct posix_acl *acl = EXT4_ACL_NOT_CACHED;
-
- spin_lock(&inode->i_lock);
- if (*i_acl != EXT4_ACL_NOT_CACHED)
- acl = posix_acl_dup(*i_acl);
- spin_unlock(&inode->i_lock);
-
- return acl;
-}
-
-static inline void
-ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
- struct posix_acl *acl)
-{
- spin_lock(&inode->i_lock);
- if (*i_acl != EXT4_ACL_NOT_CACHED)
- posix_acl_release(*i_acl);
- *i_acl = posix_acl_dup(acl);
- spin_unlock(&inode->i_lock);
-}
-
/*
* Inode operation get_posix_acl().
*
@@ -158,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
static struct posix_acl *
ext4_get_acl(struct inode *inode, int type)
{
- struct ext4_inode_info *ei = EXT4_I(inode);
int name_index;
char *value = NULL;
struct posix_acl *acl;
@@ -167,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
if (!test_opt(inode->i_sb, POSIX_ACL))
return NULL;
+ acl = get_cached_acl(inode, type);
+ if (acl != ACL_NOT_CACHED)
+ return acl;
+
switch (type) {
case ACL_TYPE_ACCESS:
- acl = ext4_iget_acl(inode, &ei->i_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
break;
-
case ACL_TYPE_DEFAULT:
- acl = ext4_iget_acl(inode, &ei->i_default_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
break;
-
default:
- return ERR_PTR(-EINVAL);
+ BUG();
}
retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
if (retval > 0) {
@@ -200,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
acl = ERR_PTR(retval);
kfree(value);
- if (!IS_ERR(acl)) {
- switch (type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
+ if (!IS_ERR(acl))
+ set_cached_acl(inode, type, acl);
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
- }
- }
return acl;
}
@@ -223,7 +186,6 @@ static int
ext4_set_acl(handle_t *handle, struct inode *inode, int type,
struct posix_acl *acl)
{
- struct ext4_inode_info *ei = EXT4_I(inode);
int name_index;
void *value = NULL;
size_t size = 0;
@@ -268,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
value, size, 0);
kfree(value);
- if (!error) {
- switch (type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
+ if (!error)
+ set_cached_acl(inode, type, acl);
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
- }
- }
return error;
}
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cb45257a246e..949789d2bba6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
-/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
- if the ACL has not been cached */
-#define EXT4_ACL_NOT_CACHED ((void *)-1)
-
/* acl.c */
extern int ext4_permission(struct inode *, int);
extern int ext4_acl_chmod(struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7d5edc38c9..9714db393efe 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -93,20 +93,20 @@ typedef unsigned int ext4_group_t;
struct ext4_allocation_request {
/* target inode for block we're allocating */
struct inode *inode;
+ /* how many blocks we want to allocate */
+ unsigned int len;
/* logical block in target inode */
ext4_lblk_t logical;
- /* phys. target (a hint) */
- ext4_fsblk_t goal;
/* the closest logical allocated block to the left */
ext4_lblk_t lleft;
- /* phys. block for ^^^ */
- ext4_fsblk_t pleft;
/* the closest logical allocated block to the right */
ext4_lblk_t lright;
- /* phys. block for ^^^ */
+ /* phys. target (a hint) */
+ ext4_fsblk_t goal;
+ /* phys. block for the closest logical allocated block to the left */
+ ext4_fsblk_t pleft;
+ /* phys. block for the closest logical allocated block to the right */
ext4_fsblk_t pright;
- /* how many blocks we want to allocate */
- unsigned int len;
/* flags. see above EXT4_MB_HINT_* */
unsigned int flags;
};
@@ -352,6 +352,7 @@ struct ext4_new_group_data {
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
+#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
/*
* ioctl commands in 32 bit emulation
@@ -447,6 +448,15 @@ struct ext4_inode {
__le32 i_version_hi; /* high 32 bits for 64-bit version */
};
+struct move_extent {
+ __u32 reserved; /* should be zero */
+ __u32 donor_fd; /* donor file descriptor */
+ __u64 orig_start; /* logical start offset in block for orig */
+ __u64 donor_start; /* logical start offset in block for donor */
+ __u64 len; /* block length to be moved */
+ __u64 moved_len; /* moved block length */
+};
+#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@@ -585,10 +595,6 @@ struct ext4_inode_info {
*/
struct rw_semaphore xattr_sem;
#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- struct posix_acl *i_acl;
- struct posix_acl *i_default_acl;
-#endif
struct list_head i_orphan; /* unlinked but open inodes */
@@ -674,7 +680,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
-#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
@@ -696,17 +701,10 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
-/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
-#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
EXT4_MOUNT_##opt)
-#else
-#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
-#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
-#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
-#endif
#define ext4_set_bit ext2_set_bit
#define ext4_set_bit_atomic ext2_set_bit_atomic
@@ -824,6 +822,13 @@ struct ext4_super_block {
};
#ifdef __KERNEL__
+
+/*
+ * run-time mount flags
+ */
+#define EXT4_MF_MNTDIR_SAMPLED 0x0001
+#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
+
/*
* fourth extended-fs super-block data in memory
*/
@@ -842,7 +847,8 @@ struct ext4_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
- unsigned long s_mount_opt;
+ unsigned int s_mount_opt;
+ unsigned int s_mount_flags;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
gid_t s_resgid;
@@ -853,6 +859,7 @@ struct ext4_sb_info {
int s_inode_size;
int s_first_ino;
unsigned int s_inode_readahead_blks;
+ unsigned int s_inode_goal;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
@@ -1305,7 +1312,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
/* ialloc.c */
-extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
+extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
+ const struct qstr *qstr, __u32 goal);
extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1329,7 +1337,7 @@ extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
- unsigned long, unsigned long, int, unsigned long *);
+ ext4_fsblk_t, unsigned long, int, unsigned long *);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
@@ -1647,6 +1655,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int flags);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
+/* move_extent.c */
+extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ __u64 start_orig, __u64 start_donor,
+ __u64 len, __u64 *moved_len);
+
/*
* Add new method to test wether block and inode bitmaps are properly
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index f0c3ec85bd48..20a84105a10b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
+extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
struct ext4_ext_path *path);
+extern int ext4_can_extents_be_merged(struct inode *inode,
+ struct ext4_extent *ex1,
+ struct ext4_extent *ex2);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index ad13a84644e1..eb27fd0f2ee8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,8 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
}
+ else
+ brelse(bh);
return err;
}
@@ -57,6 +59,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
}
+ else
+ brelse(bh);
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index be2f426f6805..139fb8cb87e4 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -131,9 +131,11 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
int __ext4_journal_get_write_access(const char *where, handle_t *handle,
struct buffer_head *bh);
+/* When called with an invalid handle, this will still do a put on the BH */
int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh);
+/* When called with an invalid handle, this will still do a put on the BH */
int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh);
@@ -281,10 +283,10 @@ static inline int ext4_should_order_data(struct inode *inode)
static inline int ext4_should_writeback_data(struct inode *inode)
{
- if (EXT4_JOURNAL(inode) == NULL)
- return 0;
if (!S_ISREG(inode->i_mode))
return 0;
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 1;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2593f748c3a4..73ebfb44ad75 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -49,7 +49,7 @@
* ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
-static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
return err;
}
-static int
+int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
@@ -1977,6 +1977,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
*/
/* 1 bitmap, 1 block group descriptor */
ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+ return ret;
}
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 588af8c77246..3f1873fef1c6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,6 +21,8 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
+#include <linux/mount.h>
+#include <linux/path.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+static int ext4_file_open(struct inode * inode, struct file * filp)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct vfsmount *mnt = filp->f_path.mnt;
+ struct path path;
+ char buf[64], *cp;
+
+ if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
+ !(sb->s_flags & MS_RDONLY))) {
+ sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+ /*
+ * Sample where the filesystem has been mounted and
+ * store it in the superblock for sysadmin convenience
+ * when trying to sort through large numbers of block
+ * devices or filesystem images.
+ */
+ memset(buf, 0, sizeof(buf));
+ path.mnt = mnt->mnt_parent;
+ path.dentry = mnt->mnt_mountpoint;
+ path_get(&path);
+ cp = d_path(&path, buf, sizeof(buf));
+ path_put(&path);
+ if (!IS_ERR(cp)) {
+ memcpy(sbi->s_es->s_last_mounted, cp,
+ sizeof(sbi->s_es->s_last_mounted));
+ sb->s_dirt = 1;
+ }
+ }
+ return generic_file_open(inode, filp);
+}
+
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
- .open = generic_file_open,
+ .open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
.splice_read = generic_file_splice_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5afe4370840b..83cf6415f599 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,10 +28,12 @@
#include <linux/writeback.h>
#include <linux/jbd2.h>
#include <linux/blkdev.h>
-#include <linux/marker.h>
+
#include "ext4.h"
#include "ext4_jbd2.h"
+#include <trace/events/ext4.h>
+
/*
* akpm: A new design for ext4_sync_file().
*
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
J_ASSERT(ext4_journal_current_handle() == NULL);
- trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
- inode->i_sb->s_id, datasync, inode->i_ino,
- dentry->d_parent->d_inode->i_ino);
+ trace_ext4_sync_file(file, dentry, datasync);
/*
* data=writeback:
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3743bd849bce..29e6dc7299b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,11 +23,14 @@
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <asm/byteorder.h>
+
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
+#include <trace/events/ext4.h>
+
/*
* ialloc.c contains the inodes allocation and deallocation routines
*/
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ino = inode->i_ino;
ext4_debug("freeing inode %lu\n", ino);
- trace_mark(ext4_free_inode,
- "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
- sb->s_id, inode->i_ino, inode->i_mode,
- (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
- (unsigned long long) inode->i_blocks);
+ trace_ext4_free_inode(inode);
/*
* Note: we must free any quota before locking the superblock,
@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
*/
static int find_group_orlov(struct super_block *sb, struct inode *parent,
- ext4_group_t *group, int mode)
+ ext4_group_t *group, int mode,
+ const struct qstr *qstr)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
struct ext4_group_desc *desc;
struct orlov_stats stats;
int flex_size = ext4_flex_bg_size(sbi);
+ struct dx_hash_info hinfo;
ngroups = real_ngroups;
if (flex_size > 1) {
@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
int best_ndir = inodes_per_group;
int ret = -1;
- get_random_bytes(&grp, sizeof(grp));
+ if (qstr) {
+ hinfo.hash_version = DX_HASH_HALF_MD4;
+ hinfo.seed = sbi->s_hash_seed;
+ ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
+ grp = hinfo.hash;
+ } else
+ get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group = parent_group + flex_size;
if (*group > ngroups)
*group = 0;
- return find_group_orlov(sb, parent, group, mode);
+ return find_group_orlov(sb, parent, group, mode, 0);
}
/*
@@ -791,7 +798,8 @@ err_ret:
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
+struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
+ const struct qstr *qstr, __u32 goal)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb);
- trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
- dir->i_ino, mode);
+ trace_ext4_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode);
sbi = EXT4_SB(sb);
+ if (!goal)
+ goal = sbi->s_inode_goal;
+
+ if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
+ group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
+ ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
+ ret2 = 0;
+ goto got_group;
+ }
+
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (test_opt(sb, OLDALLOC))
ret2 = find_group_dir(sb, dir, &group);
else
- ret2 = find_group_orlov(sb, dir, &group, mode);
+ ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
} else
ret2 = find_group_other(sb, dir, &group, mode);
@@ -851,7 +868,7 @@ got_group:
if (ret2 == -1)
goto out;
- for (i = 0; i < ngroups; i++) {
+ for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO;
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -863,8 +880,6 @@ got_group:
if (!inode_bitmap_bh)
goto fail;
- ino = 0;
-
repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
@@ -1047,8 +1062,7 @@ got:
}
ext4_debug("allocating inode %lu\n", inode->i_ino);
- trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d",
- sb->s_id, inode->i_ino, dir->i_ino, mode);
+ trace_ext4_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 875db944b22f..f9c642b22efa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,11 +37,14 @@
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
+
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "ext4_extents.h"
+#include <trace/events/ext4.h>
+
#define MPAGE_DA_EXTENT_TAIL 0x01
static inline int ext4_begin_ordered_truncate(struct inode *inode,
@@ -75,22 +78,20 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
*
- * If the handle isn't valid we're not journaling so there's nothing to do.
+ * If the handle isn't valid we're not journaling, but we still need to
+ * call into ext4_journal_revoke() to put the buffer head.
*/
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t blocknr)
+ struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;
- if (!ext4_handle_valid(handle))
- return 0;
-
might_sleep();
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
- "data mode %lx\n",
+ "data mode %x\n",
bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS));
@@ -329,8 +330,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
*/
static int ext4_block_to_path(struct inode *inode,
- ext4_lblk_t i_block,
- ext4_lblk_t offsets[4], int *boundary)
+ ext4_lblk_t i_block,
+ ext4_lblk_t offsets[4], int *boundary)
{
int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@@ -362,9 +363,9 @@ static int ext4_block_to_path(struct inode *inode,
final = ptrs;
} else {
ext4_warning(inode->i_sb, "ext4_block_to_path",
- "block %lu > max in inode %lu",
- i_block + direct_blocks +
- indirect_blocks + double_blocks, inode->i_ino);
+ "block %lu > max in inode %lu",
+ i_block + direct_blocks +
+ indirect_blocks + double_blocks, inode->i_ino);
}
if (boundary)
*boundary = final - 1 - (i_block & (ptrs - 1));
@@ -379,25 +380,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
- if (blk &&
- unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
+ if (blk &&
+ unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) {
ext4_error(inode->i_sb, function,
"invalid block reference %u "
"in inode #%lu", blk, inode->i_ino);
- return -EIO;
- }
- }
- return 0;
+ return -EIO;
+ }
+ }
+ return 0;
}
#define ext4_check_indirect_blockref(inode, bh) \
- __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
+ __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
EXT4_ADDR_PER_BLOCK((inode)->i_sb))
#define ext4_check_inode_blockref(inode) \
- __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
+ __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
EXT4_NDIR_BLOCKS)
/**
@@ -447,7 +448,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
bh = sb_getblk(sb, le32_to_cpu(p->key));
if (unlikely(!bh))
goto failure;
-
+
if (!bh_uptodate_or_lock(bh)) {
if (bh_submit_read(bh) < 0) {
put_bh(bh);
@@ -459,7 +460,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
goto failure;
}
}
-
+
add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
/* Reader: end */
if (!p->key)
@@ -552,7 +553,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
* returns it.
*/
static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
- Indirect *partial)
+ Indirect *partial)
{
/*
* XXX need to get goal block from mballoc's data structures
@@ -574,7 +575,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
* direct and indirect blocks.
*/
static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
- int blocks_to_boundary)
+ int blocks_to_boundary)
{
unsigned int count = 0;
@@ -610,9 +611,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
* direct blocks
*/
static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, ext4_fsblk_t goal,
- int indirect_blks, int blks,
- ext4_fsblk_t new_blocks[4], int *err)
+ ext4_lblk_t iblock, ext4_fsblk_t goal,
+ int indirect_blks, int blks,
+ ext4_fsblk_t new_blocks[4], int *err)
{
struct ext4_allocation_request ar;
int target, i;
@@ -683,10 +684,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
}
if (!*err) {
if (target == blks) {
- /*
- * save the new block number
- * for the first direct block
- */
+ /*
+ * save the new block number
+ * for the first direct block
+ */
new_blocks[index] = current_block;
}
blk_allocated += ar.len;
@@ -728,9 +729,9 @@ failed_out:
* as described above and return 0.
*/
static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, int indirect_blks,
- int *blks, ext4_fsblk_t goal,
- ext4_lblk_t *offsets, Indirect *branch)
+ ext4_lblk_t iblock, int indirect_blks,
+ int *blks, ext4_fsblk_t goal,
+ ext4_lblk_t *offsets, Indirect *branch)
{
int blocksize = inode->i_sb->s_blocksize;
int i, n = 0;
@@ -777,7 +778,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* the chain to point to the new allocated
* data blocks numbers
*/
- for (i=1; i < num; i++)
+ for (i = 1; i < num; i++)
*(branch[n].p + i) = cpu_to_le32(++current_block);
}
BUFFER_TRACE(bh, "marking uptodate");
@@ -820,7 +821,8 @@ failed:
* chain to new block and return 0.
*/
static int ext4_splice_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, Indirect *where, int num, int blks)
+ ext4_lblk_t block, Indirect *where, int num,
+ int blks)
{
int i;
int err = 0;
@@ -852,10 +854,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
}
/* We are done with atomic stuff, now do the rest of housekeeping */
-
- inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
-
/* had we spliced it onto indirect block? */
if (where->bh) {
/*
@@ -874,8 +872,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
} else {
/*
* OK, we spliced it into the inode itself on a direct block.
- * Inode was dirtied above.
*/
+ ext4_mark_inode_dirty(handle, inode);
jbd_debug(5, "splicing direct\n");
}
return err;
@@ -921,9 +919,9 @@ err_out:
* blocks.
*/
static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, unsigned int maxblocks,
- struct buffer_head *bh_result,
- int flags)
+ ext4_lblk_t iblock, unsigned int maxblocks,
+ struct buffer_head *bh_result,
+ int flags)
{
int err = -EIO;
ext4_lblk_t offsets[4];
@@ -939,7 +937,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, iblock, offsets,
- &blocks_to_boundary);
+ &blocks_to_boundary);
if (depth == 0)
goto out;
@@ -987,8 +985,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
* Block out ext4_truncate while we alter the tree
*/
err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
- &count, goal,
- offsets + (partial - chain), partial);
+ &count, goal,
+ offsets + (partial - chain), partial);
/*
* The ext4_splice_branch call will free and forget any buffers
@@ -999,8 +997,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
*/
if (!err)
err = ext4_splice_branch(handle, inode, iblock,
- partial, indirect_blks, count);
- else
+ partial, indirect_blks, count);
+ else
goto cleanup;
set_buffer_new(bh_result);
@@ -1172,7 +1170,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) {
- int ret = check_block_validity(inode, block,
+ int ret = check_block_validity(inode, block,
bh->b_blocknr, retval);
if (ret != 0)
return ret;
@@ -1254,7 +1252,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) {
- int ret = check_block_validity(inode, block,
+ int ret = check_block_validity(inode, block,
bh->b_blocknr, retval);
if (ret != 0)
return ret;
@@ -1405,8 +1403,7 @@ static int walk_page_buffers(handle_t *handle,
for (bh = head, block_start = 0;
ret == 0 && (bh != head || !block_start);
- block_start = block_end, bh = next)
- {
+ block_start = block_end, bh = next) {
next = bh->b_this_page;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
@@ -1447,7 +1444,7 @@ static int walk_page_buffers(handle_t *handle,
* write.
*/
static int do_journal_get_write_access(handle_t *handle,
- struct buffer_head *bh)
+ struct buffer_head *bh)
{
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
@@ -1455,27 +1452,24 @@ static int do_journal_get_write_access(handle_t *handle,
}
static int ext4_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
{
struct inode *inode = mapping->host;
int ret, needed_blocks;
handle_t *handle;
int retries = 0;
struct page *page;
- pgoff_t index;
+ pgoff_t index;
unsigned from, to;
- trace_mark(ext4_write_begin,
- "dev %s ino %lu pos %llu len %u flags %u",
- inode->i_sb->s_id, inode->i_ino,
- (unsigned long long) pos, len, flags);
+ trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
* we allocate blocks but write fails for some reason
*/
needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
- index = pos >> PAGE_CACHE_SHIFT;
+ index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1517,14 +1511,14 @@ retry:
* Add inode to orphan list in case we crash before
* truncate finishes
*/
- if (pos + len > inode->i_size)
+ if (pos + len > inode->i_size && ext4_can_truncate(inode))
ext4_orphan_add(handle, inode);
ext4_journal_stop(handle);
if (pos + len > inode->i_size) {
- vmtruncate(inode, inode->i_size);
- /*
- * If vmtruncate failed early the inode might
+ ext4_truncate(inode);
+ /*
+ * If truncate failed early the inode might
* still be on the orphan list; we need to
* make sure the inode is removed from the
* orphan list in that case.
@@ -1550,9 +1544,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
}
static int ext4_generic_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
int i_size_changed = 0;
struct inode *inode = mapping->host;
@@ -1603,25 +1597,22 @@ static int ext4_generic_write_end(struct file *file,
* buffers are managed internally.
*/
static int ext4_ordered_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
int ret = 0, ret2;
- trace_mark(ext4_ordered_write_end,
- "dev %s ino %lu pos %llu len %u copied %u",
- inode->i_sb->s_id, inode->i_ino,
- (unsigned long long) pos, len, copied);
+ trace_ext4_ordered_write_end(inode, pos, len, copied);
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
copied = ret2;
- if (pos + len > inode->i_size)
+ if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1635,9 +1626,9 @@ static int ext4_ordered_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
- vmtruncate(inode, inode->i_size);
- /*
- * If vmtruncate failed early the inode might still be
+ ext4_truncate(inode);
+ /*
+ * If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
* is removed from the orphan list in that case.
*/
@@ -1650,22 +1641,19 @@ static int ext4_ordered_write_end(struct file *file,
}
static int ext4_writeback_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
int ret = 0, ret2;
- trace_mark(ext4_writeback_write_end,
- "dev %s ino %lu pos %llu len %u copied %u",
- inode->i_sb->s_id, inode->i_ino,
- (unsigned long long) pos, len, copied);
+ trace_ext4_writeback_write_end(inode, pos, len, copied);
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
copied = ret2;
- if (pos + len > inode->i_size)
+ if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1680,9 +1668,9 @@ static int ext4_writeback_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
- vmtruncate(inode, inode->i_size);
- /*
- * If vmtruncate failed early the inode might still be
+ ext4_truncate(inode);
+ /*
+ * If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
* is removed from the orphan list in that case.
*/
@@ -1694,9 +1682,9 @@ static int ext4_writeback_write_end(struct file *file,
}
static int ext4_journalled_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
@@ -1705,10 +1693,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to;
loff_t new_i_size;
- trace_mark(ext4_journalled_write_end,
- "dev %s ino %lu pos %llu len %u copied %u",
- inode->i_sb->s_id, inode->i_ino,
- (unsigned long long) pos, len, copied);
+ trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1735,7 +1720,7 @@ static int ext4_journalled_write_end(struct file *file,
unlock_page(page);
page_cache_release(page);
- if (pos + len > inode->i_size)
+ if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1746,9 +1731,9 @@ static int ext4_journalled_write_end(struct file *file,
if (!ret)
ret = ret2;
if (pos + len > inode->i_size) {
- vmtruncate(inode, inode->i_size);
- /*
- * If vmtruncate failed early the inode might still be
+ ext4_truncate(inode);
+ /*
+ * If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
* is removed from the orphan list in that case.
*/
@@ -1854,7 +1839,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
}
static void ext4_da_page_release_reservation(struct page *page,
- unsigned long offset)
+ unsigned long offset)
{
int to_release = 0;
struct buffer_head *head, *bh;
@@ -2318,15 +2303,9 @@ flush_it:
return;
}
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
{
- /*
- * unmapped buffer is possible for holes.
- * delay buffer is possible with delayed allocation.
- * We also need to consider unwritten buffer as unmapped.
- */
- return (!buffer_mapped(bh) || buffer_delay(bh) ||
- buffer_unwritten(bh)) && buffer_dirty(bh);
+ return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
}
/*
@@ -2411,9 +2390,9 @@ static int __mpage_da_writepage(struct page *page,
* We need to try to allocate
* unmapped blocks in the same page.
* Otherwise we won't make progress
- * with the page in ext4_da_writepage
+ * with the page in ext4_writepage
*/
- if (ext4_bh_unmapped_or_delay(NULL, bh)) {
+ if (ext4_bh_delay_or_unwritten(NULL, bh)) {
mpage_add_bh_to_extent(mpd, logical,
bh->b_size,
bh->b_state);
@@ -2530,7 +2509,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
* so call get_block_wrap with create = 0
*/
ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
- BUG_ON(create && ret == 0);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
@@ -2538,15 +2516,102 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
return ret;
}
+static int bget_one(handle_t *handle, struct buffer_head *bh)
+{
+ get_bh(bh);
+ return 0;
+}
+
+static int bput_one(handle_t *handle, struct buffer_head *bh)
+{
+ put_bh(bh);
+ return 0;
+}
+
+static int __ext4_journalled_writepage(struct page *page,
+ struct writeback_control *wbc,
+ unsigned int len)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct buffer_head *page_bufs;
+ handle_t *handle = NULL;
+ int ret = 0;
+ int err;
+
+ page_bufs = page_buffers(page);
+ BUG_ON(!page_bufs);
+ walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
+ /* As soon as we unlock the page, it can go away, but we have
+ * references to buffers so we are safe */
+ unlock_page(page);
+
+ handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
+ do_journal_get_write_access);
+
+ err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
+ write_end_fn);
+ if (ret == 0)
+ ret = err;
+ err = ext4_journal_stop(handle);
+ if (!ret)
+ ret = err;
+
+ walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
+ EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+out:
+ return ret;
+}
+
/*
+ * Note that we don't need to start a transaction unless we're journaling data
+ * because we should have holes filled from ext4_page_mkwrite(). We even don't
+ * need to file the inode to the transaction's list in ordered mode because if
+ * we are writing back data added by write(), the inode is already there and if
+ * we are writing back data modified via mmap(), noone guarantees in which
+ * transaction the data will hit the disk. In case we are journaling data, we
+ * cannot start transaction directly because transaction start ranks above page
+ * lock so we have to do some magic.
+ *
* This function can get called via...
* - ext4_da_writepages after taking page lock (have journal handle)
* - journal_submit_inode_data_buffers (no journal handle)
* - shrink_page_list via pdflush (no journal handle)
* - grab_page_cache when doing write_begin (have journal handle)
+ *
+ * We don't do any block allocation in this function. If we have page with
+ * multiple blocks we need to write those buffer_heads that are mapped. This
+ * is important for mmaped based write. So if we do with blocksize 1K
+ * truncate(f, 1024);
+ * a = mmap(f, 0, 4096);
+ * a[0] = 'a';
+ * truncate(f, 4096);
+ * we have in the page first buffer_head mapped via page_mkwrite call back
+ * but other bufer_heads would be unmapped but dirty(dirty done via the
+ * do_wp_page). So writepage should write the first block. If we modify
+ * the mmap area beyond 1024 we will again get a page_fault and the
+ * page_mkwrite callback will do the block allocation and mark the
+ * buffer_heads mapped.
+ *
+ * We redirty the page if we have any buffer_heads that is either delay or
+ * unwritten in the page.
+ *
+ * We can get recursively called as show below.
+ *
+ * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ * ext4_writepage()
+ *
+ * But since we don't do any block allocation we should not deadlock.
+ * Page also have the dirty flag cleared so we don't get recurive page_lock.
*/
-static int ext4_da_writepage(struct page *page,
- struct writeback_control *wbc)
+static int ext4_writepage(struct page *page,
+ struct writeback_control *wbc)
{
int ret = 0;
loff_t size;
@@ -2554,9 +2619,7 @@ static int ext4_da_writepage(struct page *page,
struct buffer_head *page_bufs;
struct inode *inode = page->mapping->host;
- trace_mark(ext4_da_writepage,
- "dev %s ino %lu page_index %lu",
- inode->i_sb->s_id, inode->i_ino, page->index);
+ trace_ext4_writepage(inode, page);
size = i_size_read(inode);
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
@@ -2566,7 +2629,7 @@ static int ext4_da_writepage(struct page *page,
if (page_has_buffers(page)) {
page_bufs = page_buffers(page);
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_unmapped_or_delay)) {
+ ext4_bh_delay_or_unwritten)) {
/*
* We don't want to do block allocation
* So redirty the page and return
@@ -2593,13 +2656,13 @@ static int ext4_da_writepage(struct page *page,
* all are mapped and non delay. We don't want to
* do block allocation here.
*/
- ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+ ret = block_prepare_write(page, 0, len,
noalloc_get_block_write);
if (!ret) {
page_bufs = page_buffers(page);
/* check whether all are mapped and non delay */
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_unmapped_or_delay)) {
+ ext4_bh_delay_or_unwritten)) {
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
@@ -2615,7 +2678,16 @@ static int ext4_da_writepage(struct page *page,
return 0;
}
/* now mark the buffer_heads as dirty and uptodate */
- block_commit_write(page, 0, PAGE_CACHE_SIZE);
+ block_commit_write(page, 0, len);
+ }
+
+ if (PageChecked(page) && ext4_should_journal_data(inode)) {
+ /*
+ * It's mmapped pagecache. Add buffers and journal it. There
+ * doesn't seem much point in redirtying the page here.
+ */
+ ClearPageChecked(page);
+ return __ext4_journalled_writepage(page, wbc, len);
}
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2667,19 +2739,7 @@ static int ext4_da_writepages(struct address_space *mapping,
int needed_blocks, ret = 0, nr_to_writebump = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
- trace_mark(ext4_da_writepages,
- "dev %s ino %lu nr_t_write %ld "
- "pages_skipped %ld range_start %llu "
- "range_end %llu nonblocking %d "
- "for_kupdate %d for_reclaim %d "
- "for_writepages %d range_cyclic %d",
- inode->i_sb->s_id, inode->i_ino,
- wbc->nr_to_write, wbc->pages_skipped,
- (unsigned long long) wbc->range_start,
- (unsigned long long) wbc->range_end,
- wbc->nonblocking, wbc->for_kupdate,
- wbc->for_reclaim, wbc->for_writepages,
- wbc->range_cyclic);
+ trace_ext4_da_writepages(inode, wbc);
/*
* No pages to write? This is mainly a kludge to avoid starting
@@ -2693,13 +2753,13 @@ static int ext4_da_writepages(struct address_space *mapping,
* If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test
- * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
+ * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
* the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_da_writepages should
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
- if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
+ if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
return -EROFS;
/*
@@ -2845,14 +2905,7 @@ out_writepages:
if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0;
wbc->nr_to_write -= nr_to_writebump;
- trace_mark(ext4_da_writepage_result,
- "dev %s ino %lu ret %d pages_written %d "
- "pages_skipped %ld congestion %d "
- "more_io %d no_nrwrite_index_update %d",
- inode->i_sb->s_id, inode->i_ino, ret,
- pages_written, wbc->pages_skipped,
- wbc->encountered_congestion, wbc->more_io,
- wbc->no_nrwrite_index_update);
+ trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
return ret;
}
@@ -2884,8 +2937,8 @@ static int ext4_nonda_switch(struct super_block *sb)
}
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
{
int ret, retries = 0;
struct page *page;
@@ -2904,11 +2957,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
len, flags, pagep, fsdata);
}
*fsdata = (void *)0;
-
- trace_mark(ext4_da_write_begin,
- "dev %s ino %lu pos %llu len %u flags %u",
- inode->i_sb->s_id, inode->i_ino,
- (unsigned long long) pos, len, flags);
+ trace_ext4_da_write_begin(inode, pos, len, flags);
retry:
/*
* With delayed allocation, we don't log the i_disksize update
@@ -2945,7 +2994,7 @@ retry:
* i_size_read because we hold i_mutex.
*/
if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
+ ext4_truncate(inode);
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2959,7 +3008,7 @@ out:
* when write to the end of file but not require block allocation
*/
static int ext4_da_should_update_i_disksize(struct page *page,
- unsigned long offset)
+ unsigned long offset)
{
struct buffer_head *bh;
struct inode *inode = page->mapping->host;
@@ -2978,9 +3027,9 @@ static int ext4_da_should_update_i_disksize(struct page *page,
}
static int ext4_da_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
int ret = 0, ret2;
@@ -3001,10 +3050,7 @@ static int ext4_da_write_end(struct file *file,
}
}
- trace_mark(ext4_da_write_end,
- "dev %s ino %lu pos %llu len %u copied %u",
- inode->i_sb->s_id, inode->i_ino,
- (unsigned long long) pos, len, copied);
+ trace_ext4_da_write_end(inode, pos, len, copied);
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1;
@@ -3081,7 +3127,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
* not strictly speaking necessary (and for users of
* laptop_mode, not even desirable). However, to do otherwise
* would require replicating code paths in:
- *
+ *
* ext4_da_writepages() ->
* write_cache_pages() ---> (via passed in callback function)
* __mpage_da_writepage() -->
@@ -3101,7 +3147,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
* write out the pages, but rather only collect contiguous
* logical block extents, call the multi-block allocator, and
* then update the buffer heads with the block allocations.
- *
+ *
* For now, though, we'll cheat by calling filemap_flush(),
* which will map the blocks, and start the I/O, but not
* actually wait for the I/O to complete.
@@ -3171,226 +3217,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping, block, ext4_get_block);
}
-static int bget_one(handle_t *handle, struct buffer_head *bh)
-{
- get_bh(bh);
- return 0;
-}
-
-static int bput_one(handle_t *handle, struct buffer_head *bh)
-{
- put_bh(bh);
- return 0;
-}
-
-/*
- * Note that we don't need to start a transaction unless we're journaling data
- * because we should have holes filled from ext4_page_mkwrite(). We even don't
- * need to file the inode to the transaction's list in ordered mode because if
- * we are writing back data added by write(), the inode is already there and if
- * we are writing back data modified via mmap(), noone guarantees in which
- * transaction the data will hit the disk. In case we are journaling data, we
- * cannot start transaction directly because transaction start ranks above page
- * lock so we have to do some magic.
- *
- * In all journaling modes block_write_full_page() will start the I/O.
- *
- * Problem:
- *
- * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- * ext4_writepage()
- *
- * Similar for:
- *
- * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext4_get_block(). We will deadlock on various things like
- * lock_journal and i_data_sem
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- * non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- * In journalled data mode, a data buffer may be metadata against the
- * current transaction. But the same file is part of a shared mapping
- * and someone does a writepage() on it.
- *
- * We will move the buffer onto the async_data list, but *after* it has
- * been dirtied. So there's a small window where we have dirty data on
- * BJ_Metadata.
- *
- * Note that this only applies to the last partial page in the file. The
- * bit which block_write_full_page() uses prepare/commit for. (That's
- * broken code anyway: it's wrong for msync()).
- *
- * It's a rare case: affects the final partial page, for journalled data
- * where the file is subject to bith write() and writepage() in the same
- * transction. To fix it we'll need a custom block_write_full_page().
- * We'll probably need that anyway for journalling writepage() output.
- *
- * We don't honour synchronous mounts for writepage(). That would be
- * disastrous. Any write() or metadata operation will sync the fs for
- * us.
- *
- */
-static int __ext4_normal_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
-
- if (test_opt(inode->i_sb, NOBH))
- return nobh_writepage(page, noalloc_get_block_write, wbc);
- else
- return block_write_full_page(page, noalloc_get_block_write,
- wbc);
-}
-
-static int ext4_normal_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- loff_t size = i_size_read(inode);
- loff_t len;
-
- trace_mark(ext4_normal_writepage,
- "dev %s ino %lu page_index %lu",
- inode->i_sb->s_id, inode->i_ino, page->index);
- J_ASSERT(PageLocked(page));
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
-
- if (page_has_buffers(page)) {
- /* if page has buffers it should all be mapped
- * and allocated. If there are not buffers attached
- * to the page we know the page is dirty but it lost
- * buffers. That means that at some moment in time
- * after write_begin() / write_end() has been called
- * all buffers have been clean and thus they must have been
- * written at least once. So they are all mapped and we can
- * happily proceed with mapping them and writing the page.
- */
- BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
- ext4_bh_unmapped_or_delay));
- }
-
- if (!ext4_journal_current_handle())
- return __ext4_normal_writepage(page, wbc);
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
-
-static int __ext4_journalled_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- struct buffer_head *page_bufs;
- handle_t *handle = NULL;
- int ret = 0;
- int err;
-
- ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
- noalloc_get_block_write);
- if (ret != 0)
- goto out_unlock;
-
- page_bufs = page_buffers(page);
- walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
- bget_one);
- /* As soon as we unlock the page, it can go away, but we have
- * references to buffers so we are safe */
- unlock_page(page);
-
- handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
-
- ret = walk_page_buffers(handle, page_bufs, 0,
- PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
-
- err = walk_page_buffers(handle, page_bufs, 0,
- PAGE_CACHE_SIZE, NULL, write_end_fn);
- if (ret == 0)
- ret = err;
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
-
- walk_page_buffers(handle, page_bufs, 0,
- PAGE_CACHE_SIZE, NULL, bput_one);
- EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
- goto out;
-
-out_unlock:
- unlock_page(page);
-out:
- return ret;
-}
-
-static int ext4_journalled_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- loff_t size = i_size_read(inode);
- loff_t len;
-
- trace_mark(ext4_journalled_writepage,
- "dev %s ino %lu page_index %lu",
- inode->i_sb->s_id, inode->i_ino, page->index);
- J_ASSERT(PageLocked(page));
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
-
- if (page_has_buffers(page)) {
- /* if page has buffers it should all be mapped
- * and allocated. If there are not buffers attached
- * to the page we know the page is dirty but it lost
- * buffers. That means that at some moment in time
- * after write_begin() / write_end() has been called
- * all buffers have been clean and thus they must have been
- * written at least once. So they are all mapped and we can
- * happily proceed with mapping them and writing the page.
- */
- BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
- ext4_bh_unmapped_or_delay));
- }
-
- if (ext4_journal_current_handle())
- goto no_write;
-
- if (PageChecked(page)) {
- /*
- * It's mmapped pagecache. Add buffers and journal it. There
- * doesn't seem much point in redirtying the page here.
- */
- ClearPageChecked(page);
- return __ext4_journalled_writepage(page, wbc);
- } else {
- /*
- * It may be a page full of checkpoint-mode buffers. We don't
- * really know unless we go poke around in the buffer_heads.
- * But block_write_full_page will do the right thing.
- */
- return block_write_full_page(page, noalloc_get_block_write,
- wbc);
- }
-no_write:
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
-
static int ext4_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, ext4_get_block);
@@ -3442,8 +3268,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
* VFS code falls back into buffered path in that case so we are safe.
*/
static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -3537,7 +3363,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
static const struct address_space_operations ext4_ordered_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
- .writepage = ext4_normal_writepage,
+ .writepage = ext4_writepage,
.sync_page = block_sync_page,
.write_begin = ext4_write_begin,
.write_end = ext4_ordered_write_end,
@@ -3552,7 +3378,7 @@ static const struct address_space_operations ext4_ordered_aops = {
static const struct address_space_operations ext4_writeback_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
- .writepage = ext4_normal_writepage,
+ .writepage = ext4_writepage,
.sync_page = block_sync_page,
.write_begin = ext4_write_begin,
.write_end = ext4_writeback_write_end,
@@ -3567,7 +3393,7 @@ static const struct address_space_operations ext4_writeback_aops = {
static const struct address_space_operations ext4_journalled_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
- .writepage = ext4_journalled_writepage,
+ .writepage = ext4_writepage,
.sync_page = block_sync_page,
.write_begin = ext4_write_begin,
.write_end = ext4_journalled_write_end,
@@ -3581,7 +3407,7 @@ static const struct address_space_operations ext4_journalled_aops = {
static const struct address_space_operations ext4_da_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
- .writepage = ext4_da_writepage,
+ .writepage = ext4_writepage,
.writepages = ext4_da_writepages,
.sync_page = block_sync_page,
.write_begin = ext4_da_write_begin,
@@ -3628,7 +3454,8 @@ int ext4_block_truncate_page(handle_t *handle,
struct page *page;
int err = 0;
- page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT);
+ page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+ mapping_gfp_mask(mapping) & ~__GFP_FS);
if (!page)
return -EINVAL;
@@ -3763,7 +3590,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
* (no partially truncated stuff there). */
static Indirect *ext4_find_shared(struct inode *inode, int depth,
- ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top)
+ ext4_lblk_t offsets[4], Indirect chain[4],
+ __le32 *top)
{
Indirect *partial, *p;
int k, err;
@@ -3819,8 +3647,10 @@ no_top:
* than `count' because there can be holes in there.
*/
static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t block_to_free,
- unsigned long count, __le32 *first, __le32 *last)
+ struct buffer_head *bh,
+ ext4_fsblk_t block_to_free,
+ unsigned long count, __le32 *first,
+ __le32 *last)
{
__le32 *p;
if (try_to_extend_transaction(handle, inode)) {
@@ -3837,10 +3667,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
}
/*
- * Any buffers which are on the journal will be in memory. We find
- * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget()
- * on them. We've already detached each block from the file, so
- * bforget() in jbd2_journal_forget() should be safe.
+ * Any buffers which are on the journal will be in memory. We
+ * find them on the hash table so jbd2_journal_revoke() will
+ * run jbd2_journal_forget() on them. We've already detached
+ * each block from the file, so bforget() in
+ * jbd2_journal_forget() should be safe.
*
* AKPM: turn on bforget in jbd2_journal_forget()!!!
*/
@@ -4212,7 +4043,7 @@ void ext4_truncate(struct inode *inode)
(__le32*)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
BUFFER_TRACE(partial->bh, "call brelse");
- brelse (partial->bh);
+ brelse(partial->bh);
partial--;
}
do_indirects:
@@ -4453,8 +4284,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL;
}
+
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
- struct ext4_inode_info *ei)
+ struct ext4_inode_info *ei)
{
blkcnt_t i_blocks ;
struct inode *inode = &(ei->vfs_inode);
@@ -4493,10 +4325,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
return inode;
ei = EXT4_I(inode);
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- ei->i_acl = EXT4_ACL_NOT_CACHED;
- ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
ret = __ext4_get_inode_loc(inode, &iloc, 0);
if (ret < 0)
@@ -4569,7 +4397,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
EXT4_GOOD_OLD_INODE_SIZE +
ei->i_extra_isize;
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
- ei->i_state |= EXT4_STATE_XATTR;
+ ei->i_state |= EXT4_STATE_XATTR;
}
} else
ei->i_extra_isize = 0;
@@ -4588,7 +4416,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
- ((ei->i_file_acl <
+ ((ei->i_file_acl <
(le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
EXT4_SB(sb)->s_gdb_count)) ||
(ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
@@ -4603,15 +4431,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
!ext4_inode_is_fast_symlink(inode)))
/* Validate extent which is part of inode */
ret = ext4_ext_check_inode(inode);
- } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) &&
!ext4_inode_is_fast_symlink(inode))) {
- /* Validate block references which are part of inode */
+ /* Validate block references which are part of inode */
ret = ext4_check_inode_blockref(inode);
}
if (ret) {
- brelse(bh);
- goto bad_inode;
+ brelse(bh);
+ goto bad_inode;
}
if (S_ISREG(inode->i_mode)) {
@@ -4642,7 +4470,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
} else {
brelse(bh);
ret = -EIO;
- ext4_error(inode->i_sb, __func__,
+ ext4_error(inode->i_sb, __func__,
"bogus i_mode (%o) for inode=%lu",
inode->i_mode, inode->i_ino);
goto bad_inode;
@@ -4795,8 +4623,9 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le32(new_encode_dev(inode->i_rdev));
raw_inode->i_block[2] = 0;
}
- } else for (block = 0; block < EXT4_N_BLOCKS; block++)
- raw_inode->i_block[block] = ei->i_data[block];
+ } else
+ for (block = 0; block < EXT4_N_BLOCKS; block++)
+ raw_inode->i_block[block] = ei->i_data[block];
raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
if (ei->i_extra_isize) {
@@ -5150,7 +4979,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
* Give this, we know that the caller already has write access to iloc->bh.
*/
int ext4_mark_iloc_dirty(handle_t *handle,
- struct inode *inode, struct ext4_iloc *iloc)
+ struct inode *inode, struct ext4_iloc *iloc)
{
int err = 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 91e75f7a9e73..7050a9cd04a4 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -12,8 +12,8 @@
#include <linux/capability.h>
#include <linux/time.h>
#include <linux/compat.h>
-#include <linux/smp_lock.h>
#include <linux/mount.h>
+#include <linux/file.h>
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -191,7 +191,7 @@ setversion_out:
case EXT4_IOC_GROUP_EXTEND: {
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
- int err, err2;
+ int err, err2=0;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -204,19 +204,56 @@ setversion_out:
return err;
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (EXT4_SB(sb)->s_journal) {
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ }
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);
return err;
}
+
+ case EXT4_IOC_MOVE_EXT: {
+ struct move_extent me;
+ struct file *donor_filp;
+ int err;
+
+ if (copy_from_user(&me,
+ (struct move_extent __user *)arg, sizeof(me)))
+ return -EFAULT;
+
+ donor_filp = fget(me.donor_fd);
+ if (!donor_filp)
+ return -EBADF;
+
+ if (!capable(CAP_DAC_OVERRIDE)) {
+ if ((current->real_cred->fsuid != inode->i_uid) ||
+ !(inode->i_mode & S_IRUSR) ||
+ !(donor_filp->f_dentry->d_inode->i_mode &
+ S_IRUSR)) {
+ fput(donor_filp);
+ return -EACCES;
+ }
+ }
+
+ err = ext4_move_extents(filp, donor_filp, me.orig_start,
+ me.donor_start, me.len, &me.moved_len);
+ fput(donor_filp);
+
+ if (!err)
+ if (copy_to_user((struct move_extent *)arg,
+ &me, sizeof(me)))
+ return -EFAULT;
+ return err;
+ }
+
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
- int err, err2;
+ int err, err2=0;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -230,9 +267,11 @@ setversion_out:
return err;
err = ext4_group_add(sb, &input);
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (EXT4_SB(sb)->s_journal) {
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ }
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e22c0e..cd258463e2a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,8 @@
*/
#include "mballoc.h"
+#include <trace/events/ext4.h>
+
/*
* MUSTDO:
* - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
-
-
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
#if BITS_PER_LONG == 64
@@ -657,7 +657,8 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
}
}
-static void ext4_mb_generate_buddy(struct super_block *sb,
+static noinline_for_stack
+void ext4_mb_generate_buddy(struct super_block *sb,
void *buddy, void *bitmap, ext4_group_t group)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -1480,7 +1481,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
ext4_mb_check_limits(ac, e4b, 0);
}
-static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
+static noinline_for_stack
+int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b)
{
struct ext4_free_extent ex = ac->ac_b_ex;
@@ -1507,7 +1509,8 @@ static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return 0;
}
-static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
+static noinline_for_stack
+int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b)
{
ext4_group_t group = ac->ac_g_ex.fe_group;
@@ -1566,7 +1569,8 @@ static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
* The routine scans buddy structures (not bitmap!) from given order
* to max order and tries to find big enough chunk to satisfy the req
*/
-static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
+static noinline_for_stack
+void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b)
{
struct super_block *sb = ac->ac_sb;
@@ -1609,7 +1613,8 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
* In order to optimize scanning, caller must pass number of
* free blocks in the group, so the routine can know upper limit.
*/
-static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
+static noinline_for_stack
+void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b)
{
struct super_block *sb = ac->ac_sb;
@@ -1668,7 +1673,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* we try to find stripe-aligned chunks for stripe-size requests
* XXX should do so at least for multiples of stripe size as well
*/
-static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
+static noinline_for_stack
+void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
struct ext4_buddy *e4b)
{
struct super_block *sb = ac->ac_sb;
@@ -1831,7 +1837,8 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
}
-static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+static noinline_for_stack
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
{
int ret;
@@ -2859,9 +2866,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+ entry->start_blk
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
- trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u",
- sb->s_id, (unsigned long long) discard_block,
- entry->count);
+ trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
+ entry->count);
sb_issue_discard(sb, discard_block, entry->count);
kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -2903,7 +2909,11 @@ int __init init_ext4_mballoc(void)
void exit_ext4_mballoc(void)
{
- /* XXX: synchronize_rcu(); */
+ /*
+ * Wait for completion of call_rcu()'s on ext4_pspace_cachep
+ * before destroying the slab cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
@@ -3458,7 +3468,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
* used in in-core bitmap. buddy must be generated from this bitmap
* Need to be called with ext4 group lock held
*/
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+static noinline_for_stack
+void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -3629,10 +3640,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
- trace_mark(ext4_mb_new_inode_pa,
- "dev %s ino %lu pstart %llu len %u lstart %u",
- sb->s_id, ac->ac_inode->i_ino,
- pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+ trace_ext4_mb_new_inode_pa(ac, pa);
ext4_mb_use_inode_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3691,9 +3699,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_type = MB_GROUP_PA;
mb_debug("new group pa %p: %llu/%u for %u\n", pa,
- pa->pa_pstart, pa->pa_len, pa->pa_lstart);
- trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u",
- sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+ pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+ trace_ext4_mb_new_group_pa(ac, pa);
ext4_mb_use_group_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,10 +3790,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_mb_store_history(ac);
}
- trace_mark(ext4_mb_release_inode_pa,
- "dev %s ino %lu block %llu count %u",
- sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
- next - bit);
+ trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
+ next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
@@ -3820,8 +3825,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
if (ac)
ac->ac_op = EXT4_MB_HISTORY_DISCARD;
- trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d",
- sb->s_id, pa->pa_pstart, pa->pa_len);
+ trace_ext4_mb_release_group_pa(ac, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3893,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+ if (ac)
+ ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
@@ -3987,12 +3993,15 @@ void ext4_discard_preallocations(struct inode *inode)
}
mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
- trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id,
- inode->i_ino);
+ trace_ext4_discard_preallocations(inode);
INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+ if (ac) {
+ ac->ac_sb = sb;
+ ac->ac_inode = inode;
+ }
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
@@ -4218,14 +4227,9 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
+ memset(ac, 0, sizeof(struct ext4_allocation_context));
ac->ac_b_ex.fe_logical = ar->logical;
- ac->ac_b_ex.fe_group = 0;
- ac->ac_b_ex.fe_start = 0;
- ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE;
- ac->ac_groups_scanned = 0;
- ac->ac_ex_scanned = 0;
- ac->ac_found = 0;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
ac->ac_o_ex.fe_logical = ar->logical;
@@ -4236,15 +4240,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ac->ac_g_ex.fe_group = group;
ac->ac_g_ex.fe_start = block;
ac->ac_g_ex.fe_len = len;
- ac->ac_f_ex.fe_len = 0;
ac->ac_flags = ar->flags;
- ac->ac_2order = 0;
- ac->ac_criteria = 0;
- ac->ac_pa = NULL;
- ac->ac_bitmap_page = NULL;
- ac->ac_buddy_page = NULL;
- ac->alloc_semp = NULL;
- ac->ac_lg = NULL;
/* we have to define context: we'll we work with a file or
* locality group. this is a policy, actually */
@@ -4276,6 +4272,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
INIT_LIST_HEAD(&discard_list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+ if (ac)
+ ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4445,8 +4443,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
int ret;
int freed = 0;
- trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
- sb->s_id, needed);
+ trace_ext4_mb_discard_preallocations(sb, needed);
for (i = 0; i < ngroups && needed > 0; i++) {
ret = ext4_mb_discard_group_preallocations(sb, i, needed);
freed += ret;
@@ -4475,17 +4472,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);
- trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu "
- "lblk %llu goal %llu lleft %llu lright %llu "
- "pleft %llu pright %llu ",
- sb->s_id, ar->flags, ar->len,
- ar->inode ? ar->inode->i_ino : 0,
- (unsigned long long) ar->logical,
- (unsigned long long) ar->goal,
- (unsigned long long) ar->lleft,
- (unsigned long long) ar->lright,
- (unsigned long long) ar->pleft,
- (unsigned long long) ar->pright);
+ trace_ext4_request_blocks(ar);
/*
* For delayed allocation, we could skip the ENOSPC and
@@ -4594,18 +4581,7 @@ out3:
reserv_blks);
}
- trace_mark(ext4_allocate_blocks,
- "dev %s block %llu flags %u len %u ino %lu "
- "logical %llu goal %llu lleft %llu lright %llu "
- "pleft %llu pright %llu ",
- sb->s_id, (unsigned long long) block,
- ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
- (unsigned long long) ar->logical,
- (unsigned long long) ar->goal,
- (unsigned long long) ar->lleft,
- (unsigned long long) ar->lright,
- (unsigned long long) ar->pleft,
- (unsigned long long) ar->pright);
+ trace_ext4_allocate_blocks(ar, (unsigned long long)block);
return block;
}
@@ -4709,7 +4685,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* Main entry point into mballoc to free blocks
*/
void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
- unsigned long block, unsigned long count,
+ ext4_fsblk_t block, unsigned long count,
int metadata, unsigned long *freed)
{
struct buffer_head *bitmap_bh = NULL;
@@ -4735,15 +4711,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
block + count > ext4_blocks_count(es)) {
ext4_error(sb, __func__,
"Freeing blocks not in datazone - "
- "block = %lu, count = %lu", block, count);
+ "block = %llu, count = %lu", block, count);
goto error_return;
}
- ext4_debug("freeing block %lu\n", block);
- trace_mark(ext4_free_blocks,
- "dev %s block %llu count %lu metadata %d ino %lu",
- sb->s_id, (unsigned long long) block, count, metadata,
- inode ? inode->i_ino : 0);
+ ext4_debug("freeing block %llu\n", block);
+ trace_ext4_free_blocks(inode, block, count, metadata);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
@@ -4784,7 +4757,7 @@ do_more:
ext4_error(sb, __func__,
"Freeing blocks in system zone - "
- "Block = %lu, count = %lu", block, count);
+ "Block = %llu, count = %lu", block, count);
/* err = 0. ext4_std_error should be a no op */
goto error_return;
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 75e34f69215b..c96bb19f58f9 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -19,7 +19,6 @@
#include <linux/seq_file.h>
#include <linux/version.h>
#include <linux/blkdev.h>
-#include <linux/marker.h>
#include <linux/mutex.h>
#include "ext4_jbd2.h"
#include "ext4.h"
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index fe64d9f79852..313a50b39741 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL;
struct list_blocks_struct lb;
unsigned long max_entries;
+ __u32 goal;
/*
* If the filesystem does not support extents, or the inode
@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
retval = PTR_ERR(handle);
return retval;
}
- tmp_inode = ext4_new_inode(handle,
- inode->i_sb->s_root->d_inode,
- S_IFREG);
+ goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
+ EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
+ tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+ S_IFREG, 0, goal);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
new file mode 100644
index 000000000000..bbf2dd9404dc
--- /dev/null
+++ b/fs/ext4/move_extent.c
@@ -0,0 +1,1320 @@
+/*
+ * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
+ * Written by Takashi Sato <t-sato@yk.jp.nec.com>
+ * Akira Fujita <a-fujita@rs.jp.nec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/fs.h>
+#include <linux/quotaops.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
+#include "ext4.h"
+
+#define get_ext_path(path, inode, block, ret) \
+ do { \
+ path = ext4_ext_find_extent(inode, block, path); \
+ if (IS_ERR(path)) { \
+ ret = PTR_ERR(path); \
+ path = NULL; \
+ } \
+ } while (0)
+
+/**
+ * copy_extent_status - Copy the extent's initialization status
+ *
+ * @src: an extent for getting initialize status
+ * @dest: an extent to be set the status
+ */
+static void
+copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
+{
+ if (ext4_ext_is_uninitialized(src))
+ ext4_ext_mark_uninitialized(dest);
+ else
+ dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
+}
+
+/**
+ * mext_next_extent - Search for the next extent and set it to "extent"
+ *
+ * @inode: inode which is searched
+ * @path: this will obtain data for the next extent
+ * @extent: pointer to the next extent we have just gotten
+ *
+ * Search the next extent in the array of ext4_ext_path structure (@path)
+ * and set it to ext4_extent structure (@extent). In addition, the member of
+ * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
+ * ext4_ext_path structure refers to the last extent, or a negative error
+ * value on failure.
+ */
+static int
+mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+ struct ext4_extent **extent)
+{
+ int ppos, leaf_ppos = path->p_depth;
+
+ ppos = leaf_ppos;
+ if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+ /* leaf block */
+ *extent = ++path[ppos].p_ext;
+ return 0;
+ }
+
+ while (--ppos >= 0) {
+ if (EXT_LAST_INDEX(path[ppos].p_hdr) >
+ path[ppos].p_idx) {
+ int cur_ppos = ppos;
+
+ /* index block */
+ path[ppos].p_idx++;
+ path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ if (path[ppos+1].p_bh)
+ brelse(path[ppos+1].p_bh);
+ path[ppos+1].p_bh =
+ sb_bread(inode->i_sb, path[ppos].p_block);
+ if (!path[ppos+1].p_bh)
+ return -EIO;
+ path[ppos+1].p_hdr =
+ ext_block_hdr(path[ppos+1].p_bh);
+
+ /* Halfway index block */
+ while (++cur_ppos < leaf_ppos) {
+ path[cur_ppos].p_idx =
+ EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
+ path[cur_ppos].p_block =
+ idx_pblock(path[cur_ppos].p_idx);
+ if (path[cur_ppos+1].p_bh)
+ brelse(path[cur_ppos+1].p_bh);
+ path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
+ path[cur_ppos].p_block);
+ if (!path[cur_ppos+1].p_bh)
+ return -EIO;
+ path[cur_ppos+1].p_hdr =
+ ext_block_hdr(path[cur_ppos+1].p_bh);
+ }
+
+ /* leaf block */
+ path[leaf_ppos].p_ext = *extent =
+ EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
+ return 0;
+ }
+ }
+ /* We found the last extent */
+ return 1;
+}
+
+/**
+ * mext_double_down_read - Acquire two inodes' read semaphore
+ *
+ * @orig_inode: original inode structure
+ * @donor_inode: donor inode structure
+ * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+ */
+static void
+mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+{
+ struct inode *first = orig_inode, *second = donor_inode;
+
+ BUG_ON(orig_inode == NULL || donor_inode == NULL);
+
+ /*
+ * Use the inode number to provide the stable locking order instead
+ * of its address, because the C language doesn't guarantee you can
+ * compare pointers that don't come from the same array.
+ */
+ if (donor_inode->i_ino < orig_inode->i_ino) {
+ first = donor_inode;
+ second = orig_inode;
+ }
+
+ down_read(&EXT4_I(first)->i_data_sem);
+ down_read(&EXT4_I(second)->i_data_sem);
+}
+
+/**
+ * mext_double_down_write - Acquire two inodes' write semaphore
+ *
+ * @orig_inode: original inode structure
+ * @donor_inode: donor inode structure
+ * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
+ */
+static void
+mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
+{
+ struct inode *first = orig_inode, *second = donor_inode;
+
+ BUG_ON(orig_inode == NULL || donor_inode == NULL);
+
+ /*
+ * Use the inode number to provide the stable locking order instead
+ * of its address, because the C language doesn't guarantee you can
+ * compare pointers that don't come from the same array.
+ */
+ if (donor_inode->i_ino < orig_inode->i_ino) {
+ first = donor_inode;
+ second = orig_inode;
+ }
+
+ down_write(&EXT4_I(first)->i_data_sem);
+ down_write(&EXT4_I(second)->i_data_sem);
+}
+
+/**
+ * mext_double_up_read - Release two inodes' read semaphore
+ *
+ * @orig_inode: original inode structure to be released its lock first
+ * @donor_inode: donor inode structure to be released its lock second
+ * Release read semaphore of two inodes (orig and donor).
+ */
+static void
+mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
+{
+ BUG_ON(orig_inode == NULL || donor_inode == NULL);
+
+ up_read(&EXT4_I(orig_inode)->i_data_sem);
+ up_read(&EXT4_I(donor_inode)->i_data_sem);
+}
+
+/**
+ * mext_double_up_write - Release two inodes' write semaphore
+ *
+ * @orig_inode: original inode structure to be released its lock first
+ * @donor_inode: donor inode structure to be released its lock second
+ * Release write semaphore of two inodes (orig and donor).
+ */
+static void
+mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
+{
+ BUG_ON(orig_inode == NULL || donor_inode == NULL);
+
+ up_write(&EXT4_I(orig_inode)->i_data_sem);
+ up_write(&EXT4_I(donor_inode)->i_data_sem);
+}
+
+/**
+ * mext_insert_across_blocks - Insert extents across leaf block
+ *
+ * @handle: journal handle
+ * @orig_inode: original inode
+ * @o_start: first original extent to be changed
+ * @o_end: last original extent to be changed
+ * @start_ext: first new extent to be inserted
+ * @new_ext: middle of new extent to be inserted
+ * @end_ext: last new extent to be inserted
+ *
+ * Allocate a new leaf block and insert extents into it. Return 0 on success,
+ * or a negative error value on failure.
+ */
+static int
+mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
+ struct ext4_extent *o_start, struct ext4_extent *o_end,
+ struct ext4_extent *start_ext, struct ext4_extent *new_ext,
+ struct ext4_extent *end_ext)
+{
+ struct ext4_ext_path *orig_path = NULL;
+ ext4_lblk_t eblock = 0;
+ int new_flag = 0;
+ int end_flag = 0;
+ int err = 0;
+
+ if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
+ if (o_start == o_end) {
+
+ /* start_ext new_ext end_ext
+ * donor |---------|-----------|--------|
+ * orig |------------------------------|
+ */
+ end_flag = 1;
+ } else {
+
+ /* start_ext new_ext end_ext
+ * donor |---------|----------|---------|
+ * orig |---------------|--------------|
+ */
+ o_end->ee_block = end_ext->ee_block;
+ o_end->ee_len = end_ext->ee_len;
+ ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+ }
+
+ o_start->ee_len = start_ext->ee_len;
+ new_flag = 1;
+
+ } else if (start_ext->ee_len && new_ext->ee_len &&
+ !end_ext->ee_len && o_start == o_end) {
+
+ /* start_ext new_ext
+ * donor |--------------|---------------|
+ * orig |------------------------------|
+ */
+ o_start->ee_len = start_ext->ee_len;
+ new_flag = 1;
+
+ } else if (!start_ext->ee_len && new_ext->ee_len &&
+ end_ext->ee_len && o_start == o_end) {
+
+ /* new_ext end_ext
+ * donor |--------------|---------------|
+ * orig |------------------------------|
+ */
+ o_end->ee_block = end_ext->ee_block;
+ o_end->ee_len = end_ext->ee_len;
+ ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+
+ /*
+ * Set 0 to the extent block if new_ext was
+ * the first block.
+ */
+ if (new_ext->ee_block)
+ eblock = le32_to_cpu(new_ext->ee_block);
+
+ new_flag = 1;
+ } else {
+ ext4_debug("ext4 move extent: Unexpected insert case\n");
+ return -EIO;
+ }
+
+ if (new_flag) {
+ get_ext_path(orig_path, orig_inode, eblock, err);
+ if (orig_path == NULL)
+ goto out;
+
+ if (ext4_ext_insert_extent(handle, orig_inode,
+ orig_path, new_ext))
+ goto out;
+ }
+
+ if (end_flag) {
+ get_ext_path(orig_path, orig_inode,
+ le32_to_cpu(end_ext->ee_block) - 1, err);
+ if (orig_path == NULL)
+ goto out;
+
+ if (ext4_ext_insert_extent(handle, orig_inode,
+ orig_path, end_ext))
+ goto out;
+ }
+out:
+ if (orig_path) {
+ ext4_ext_drop_refs(orig_path);
+ kfree(orig_path);
+ }
+
+ return err;
+
+}
+
+/**
+ * mext_insert_inside_block - Insert new extent to the extent block
+ *
+ * @o_start: first original extent to be moved
+ * @o_end: last original extent to be moved
+ * @start_ext: first new extent to be inserted
+ * @new_ext: middle of new extent to be inserted
+ * @end_ext: last new extent to be inserted
+ * @eh: extent header of target leaf block
+ * @range_to_move: used to decide how to insert extent
+ *
+ * Insert extents into the leaf block. The extent (@o_start) is overwritten
+ * by inserted extents.
+ */
+static void
+mext_insert_inside_block(struct ext4_extent *o_start,
+ struct ext4_extent *o_end,
+ struct ext4_extent *start_ext,
+ struct ext4_extent *new_ext,
+ struct ext4_extent *end_ext,
+ struct ext4_extent_header *eh,
+ int range_to_move)
+{
+ int i = 0;
+ unsigned long len;
+
+ /* Move the existing extents */
+ if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
+ len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
+ (unsigned long)(o_end + 1);
+ memmove(o_end + 1 + range_to_move, o_end + 1, len);
+ }
+
+ /* Insert start entry */
+ if (start_ext->ee_len)
+ o_start[i++].ee_len = start_ext->ee_len;
+
+ /* Insert new entry */
+ if (new_ext->ee_len) {
+ o_start[i] = *new_ext;
+ ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+ }
+
+ /* Insert end entry */
+ if (end_ext->ee_len)
+ o_start[i] = *end_ext;
+
+ /* Increment the total entries counter on the extent block */
+ le16_add_cpu(&eh->eh_entries, range_to_move);
+}
+
+/**
+ * mext_insert_extents - Insert new extent
+ *
+ * @handle: journal handle
+ * @orig_inode: original inode
+ * @orig_path: path indicates first extent to be changed
+ * @o_start: first original extent to be changed
+ * @o_end: last original extent to be changed
+ * @start_ext: first new extent to be inserted
+ * @new_ext: middle of new extent to be inserted
+ * @end_ext: last new extent to be inserted
+ *
+ * Call the function to insert extents. If we cannot add more extents into
+ * the leaf block, we call mext_insert_across_blocks() to create a
+ * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
+ * on success, or a negative error value on failure.
+ */
+static int
+mext_insert_extents(handle_t *handle, struct inode *orig_inode,
+ struct ext4_ext_path *orig_path,
+ struct ext4_extent *o_start,
+ struct ext4_extent *o_end,
+ struct ext4_extent *start_ext,
+ struct ext4_extent *new_ext,
+ struct ext4_extent *end_ext)
+{
+ struct ext4_extent_header *eh;
+ unsigned long need_slots, slots_range;
+ int range_to_move, depth, ret;
+
+ /*
+ * The extents need to be inserted
+ * start_extent + new_extent + end_extent.
+ */
+ need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
+ (new_ext->ee_len ? 1 : 0);
+
+ /* The number of slots between start and end */
+ slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
+ / sizeof(struct ext4_extent);
+
+ /* Range to move the end of extent */
+ range_to_move = need_slots - slots_range;
+ depth = orig_path->p_depth;
+ orig_path += depth;
+ eh = orig_path->p_hdr;
+
+ if (depth) {
+ /* Register to journal */
+ ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
+ if (ret)
+ return ret;
+ }
+
+ /* Expansion */
+ if (range_to_move > 0 &&
+ (range_to_move > le16_to_cpu(eh->eh_max)
+ - le16_to_cpu(eh->eh_entries))) {
+
+ ret = mext_insert_across_blocks(handle, orig_inode, o_start,
+ o_end, start_ext, new_ext, end_ext);
+ if (ret < 0)
+ return ret;
+ } else
+ mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
+ end_ext, eh, range_to_move);
+
+ if (depth) {
+ ret = ext4_handle_dirty_metadata(handle, orig_inode,
+ orig_path->p_bh);
+ if (ret)
+ return ret;
+ } else {
+ ret = ext4_mark_inode_dirty(handle, orig_inode);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * mext_leaf_block - Move one leaf extent block into the inode.
+ *
+ * @handle: journal handle
+ * @orig_inode: original inode
+ * @orig_path: path indicates first extent to be changed
+ * @dext: donor extent
+ * @from: start offset on the target file
+ *
+ * In order to insert extents into the leaf block, we must divide the extent
+ * in the leaf block into three extents. The one is located to be inserted
+ * extents, and the others are located around it.
+ *
+ * Therefore, this function creates structures to save extents of the leaf
+ * block, and inserts extents by calling mext_insert_extents() with
+ * created extents. Return 0 on success, or a negative error value on failure.
+ */
+static int
+mext_leaf_block(handle_t *handle, struct inode *orig_inode,
+ struct ext4_ext_path *orig_path, struct ext4_extent *dext,
+ ext4_lblk_t *from)
+{
+ struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
+ struct ext4_extent new_ext, start_ext, end_ext;
+ ext4_lblk_t new_ext_end;
+ ext4_fsblk_t new_phys_end;
+ int oext_alen, new_ext_alen, end_ext_alen;
+ int depth = ext_depth(orig_inode);
+ int ret;
+
+ o_start = o_end = oext = orig_path[depth].p_ext;
+ oext_alen = ext4_ext_get_actual_len(oext);
+ start_ext.ee_len = end_ext.ee_len = 0;
+
+ new_ext.ee_block = cpu_to_le32(*from);
+ ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
+ new_ext.ee_len = dext->ee_len;
+ new_ext_alen = ext4_ext_get_actual_len(&new_ext);
+ new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
+ new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
+
+ /*
+ * Case: original extent is first
+ * oext |--------|
+ * new_ext |--|
+ * start_ext |--|
+ */
+ if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
+ le32_to_cpu(new_ext.ee_block) <
+ le32_to_cpu(oext->ee_block) + oext_alen) {
+ start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
+ le32_to_cpu(oext->ee_block));
+ copy_extent_status(oext, &start_ext);
+ } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
+ prev_ext = oext - 1;
+ /*
+ * We can merge new_ext into previous extent,
+ * if these are contiguous and same extent type.
+ */
+ if (ext4_can_extents_be_merged(orig_inode, prev_ext,
+ &new_ext)) {
+ o_start = prev_ext;
+ start_ext.ee_len = cpu_to_le16(
+ ext4_ext_get_actual_len(prev_ext) +
+ new_ext_alen);
+ copy_extent_status(prev_ext, &start_ext);
+ new_ext.ee_len = 0;
+ }
+ }
+
+ /*
+ * Case: new_ext_end must be less than oext
+ * oext |-----------|
+ * new_ext |-------|
+ */
+ BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
+
+ /*
+ * Case: new_ext is smaller than original extent
+ * oext |---------------|
+ * new_ext |-----------|
+ * end_ext |---|
+ */
+ if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
+ new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
+ end_ext.ee_len =
+ cpu_to_le16(le32_to_cpu(oext->ee_block) +
+ oext_alen - 1 - new_ext_end);
+ copy_extent_status(oext, &end_ext);
+ end_ext_alen = ext4_ext_get_actual_len(&end_ext);
+ ext4_ext_store_pblock(&end_ext,
+ (ext_pblock(o_end) + oext_alen - end_ext_alen));
+ end_ext.ee_block =
+ cpu_to_le32(le32_to_cpu(o_end->ee_block) +
+ oext_alen - end_ext_alen);
+ }
+
+ ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
+ o_end, &start_ext, &new_ext, &end_ext);
+ return ret;
+}
+
+/**
+ * mext_calc_swap_extents - Calculate extents for extent swapping.
+ *
+ * @tmp_dext: the extent that will belong to the original inode
+ * @tmp_oext: the extent that will belong to the donor inode
+ * @orig_off: block offset of original inode
+ * @donor_off: block offset of donor inode
+ * @max_count: the maximun length of extents
+ */
+static void
+mext_calc_swap_extents(struct ext4_extent *tmp_dext,
+ struct ext4_extent *tmp_oext,
+ ext4_lblk_t orig_off, ext4_lblk_t donor_off,
+ ext4_lblk_t max_count)
+{
+ ext4_lblk_t diff, orig_diff;
+ struct ext4_extent dext_old, oext_old;
+
+ dext_old = *tmp_dext;
+ oext_old = *tmp_oext;
+
+ /* When tmp_dext is too large, pick up the target range. */
+ diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
+
+ ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
+ tmp_dext->ee_block =
+ cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
+ tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
+
+ if (max_count < ext4_ext_get_actual_len(tmp_dext))
+ tmp_dext->ee_len = cpu_to_le16(max_count);
+
+ orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
+ ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
+
+ /* Adjust extent length if donor extent is larger than orig */
+ if (ext4_ext_get_actual_len(tmp_dext) >
+ ext4_ext_get_actual_len(tmp_oext) - orig_diff)
+ tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
+ orig_diff);
+
+ tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
+
+ copy_extent_status(&oext_old, tmp_dext);
+ copy_extent_status(&dext_old, tmp_oext);
+}
+
+/**
+ * mext_replace_branches - Replace original extents with new extents
+ *
+ * @handle: journal handle
+ * @orig_inode: original inode
+ * @donor_inode: donor inode
+ * @from: block offset of orig_inode
+ * @count: block count to be replaced
+ *
+ * Replace original inode extents and donor inode extents page by page.
+ * We implement this replacement in the following three steps:
+ * 1. Save the block information of original and donor inodes into
+ * dummy extents.
+ * 2. Change the block information of original inode to point at the
+ * donor inode blocks.
+ * 3. Change the block information of donor inode to point at the saved
+ * original inode blocks in the dummy extents.
+ *
+ * Return 0 on success, or a negative error value on failure.
+ */
+static int
+mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+ struct inode *donor_inode, ext4_lblk_t from,
+ ext4_lblk_t count)
+{
+ struct ext4_ext_path *orig_path = NULL;
+ struct ext4_ext_path *donor_path = NULL;
+ struct ext4_extent *oext, *dext;
+ struct ext4_extent tmp_dext, tmp_oext;
+ ext4_lblk_t orig_off = from, donor_off = from;
+ int err = 0;
+ int depth;
+ int replaced_count = 0;
+ int dext_alen;
+
+ mext_double_down_write(orig_inode, donor_inode);
+
+ /* Get the original extent for the block "orig_off" */
+ get_ext_path(orig_path, orig_inode, orig_off, err);
+ if (orig_path == NULL)
+ goto out;
+
+ /* Get the donor extent for the head */
+ get_ext_path(donor_path, donor_inode, donor_off, err);
+ if (donor_path == NULL)
+ goto out;
+ depth = ext_depth(orig_inode);
+ oext = orig_path[depth].p_ext;
+ tmp_oext = *oext;
+
+ depth = ext_depth(donor_inode);
+ dext = donor_path[depth].p_ext;
+ tmp_dext = *dext;
+
+ mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+ donor_off, count);
+
+ /* Loop for the donor extents */
+ while (1) {
+ /* The extent for donor must be found. */
+ BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
+
+ /* Set donor extent to orig extent */
+ err = mext_leaf_block(handle, orig_inode,
+ orig_path, &tmp_dext, &orig_off);
+ if (err < 0)
+ goto out;
+
+ /* Set orig extent to donor extent */
+ err = mext_leaf_block(handle, donor_inode,
+ donor_path, &tmp_oext, &donor_off);
+ if (err < 0)
+ goto out;
+
+ dext_alen = ext4_ext_get_actual_len(&tmp_dext);
+ replaced_count += dext_alen;
+ donor_off += dext_alen;
+ orig_off += dext_alen;
+
+ /* Already moved the expected blocks */
+ if (replaced_count >= count)
+ break;
+
+ if (orig_path)
+ ext4_ext_drop_refs(orig_path);
+ get_ext_path(orig_path, orig_inode, orig_off, err);
+ if (orig_path == NULL)
+ goto out;
+ depth = ext_depth(orig_inode);
+ oext = orig_path[depth].p_ext;
+ if (le32_to_cpu(oext->ee_block) +
+ ext4_ext_get_actual_len(oext) <= orig_off) {
+ err = 0;
+ goto out;
+ }
+ tmp_oext = *oext;
+
+ if (donor_path)
+ ext4_ext_drop_refs(donor_path);
+ get_ext_path(donor_path, donor_inode,
+ donor_off, err);
+ if (donor_path == NULL)
+ goto out;
+ depth = ext_depth(donor_inode);
+ dext = donor_path[depth].p_ext;
+ if (le32_to_cpu(dext->ee_block) +
+ ext4_ext_get_actual_len(dext) <= donor_off) {
+ err = 0;
+ goto out;
+ }
+ tmp_dext = *dext;
+
+ mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+ donor_off,
+ count - replaced_count);
+ }
+
+out:
+ if (orig_path) {
+ ext4_ext_drop_refs(orig_path);
+ kfree(orig_path);
+ }
+ if (donor_path) {
+ ext4_ext_drop_refs(donor_path);
+ kfree(donor_path);
+ }
+
+ mext_double_up_write(orig_inode, donor_inode);
+ return err;
+}
+
+/**
+ * move_extent_per_page - Move extent data per page
+ *
+ * @o_filp: file structure of original file
+ * @donor_inode: donor inode
+ * @orig_page_offset: page index on original file
+ * @data_offset_in_page: block index where data swapping starts
+ * @block_len_in_page: the number of blocks to be swapped
+ * @uninit: orig extent is uninitialized or not
+ *
+ * Save the data in original inode blocks and replace original inode extents
+ * with donor inode extents by calling mext_replace_branches().
+ * Finally, write out the saved data in new original inode blocks. Return 0
+ * on success, or a negative error value on failure.
+ */
+static int
+move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
+ pgoff_t orig_page_offset, int data_offset_in_page,
+ int block_len_in_page, int uninit)
+{
+ struct inode *orig_inode = o_filp->f_dentry->d_inode;
+ struct address_space *mapping = orig_inode->i_mapping;
+ struct buffer_head *bh;
+ struct page *page = NULL;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ handle_t *handle;
+ ext4_lblk_t orig_blk_offset;
+ long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+ unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+ unsigned int w_flags = 0;
+ unsigned int tmp_data_len, data_len;
+ void *fsdata;
+ int ret, i, jblocks;
+ int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+
+ /*
+ * It needs twice the amount of ordinary journal buffers because
+ * inode and donor_inode may change each different metadata blocks.
+ */
+ jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
+ handle = ext4_journal_start(orig_inode, jblocks);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ return ret;
+ }
+
+ if (segment_eq(get_fs(), KERNEL_DS))
+ w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
+
+ orig_blk_offset = orig_page_offset * blocks_per_page +
+ data_offset_in_page;
+
+ /*
+ * If orig extent is uninitialized one,
+ * it's not necessary force the page into memory
+ * and then force it to be written out again.
+ * Just swap data blocks between orig and donor.
+ */
+ if (uninit) {
+ ret = mext_replace_branches(handle, orig_inode,
+ donor_inode, orig_blk_offset,
+ block_len_in_page);
+
+ /* Clear the inode cache not to refer to the old data */
+ ext4_ext_invalidate_cache(orig_inode);
+ ext4_ext_invalidate_cache(donor_inode);
+ goto out2;
+ }
+
+ offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+
+ /* Calculate data_len */
+ if ((orig_blk_offset + block_len_in_page - 1) ==
+ ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
+ /* Replace the last block */
+ tmp_data_len = orig_inode->i_size & (blocksize - 1);
+ /*
+ * If data_len equal zero, it shows data_len is multiples of
+ * blocksize. So we set appropriate value.
+ */
+ if (tmp_data_len == 0)
+ tmp_data_len = blocksize;
+
+ data_len = tmp_data_len +
+ ((block_len_in_page - 1) << orig_inode->i_blkbits);
+ } else {
+ data_len = block_len_in_page << orig_inode->i_blkbits;
+ }
+
+ ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
+ &page, &fsdata);
+ if (unlikely(ret < 0))
+ goto out;
+
+ if (!PageUptodate(page)) {
+ mapping->a_ops->readpage(o_filp, page);
+ lock_page(page);
+ }
+
+ /*
+ * try_to_release_page() doesn't call releasepage in writeback mode.
+ * We should care about the order of writing to the same file
+ * by multiple move extent processes.
+ * It needs to call wait_on_page_writeback() to wait for the
+ * writeback of the page.
+ */
+ if (PageWriteback(page))
+ wait_on_page_writeback(page);
+
+ /* Release old bh and drop refs */
+ try_to_release_page(page, 0);
+
+ ret = mext_replace_branches(handle, orig_inode, donor_inode,
+ orig_blk_offset, block_len_in_page);
+ if (ret < 0)
+ goto out;
+
+ /* Clear the inode cache not to refer to the old data */
+ ext4_ext_invalidate_cache(orig_inode);
+ ext4_ext_invalidate_cache(donor_inode);
+
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+
+ bh = page_buffers(page);
+ for (i = 0; i < data_offset_in_page; i++)
+ bh = bh->b_this_page;
+
+ for (i = 0; i < block_len_in_page; i++) {
+ ret = ext4_get_block(orig_inode,
+ (sector_t)(orig_blk_offset + i), bh, 0);
+ if (ret < 0)
+ goto out;
+
+ if (bh->b_this_page != NULL)
+ bh = bh->b_this_page;
+ }
+
+ ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
+ page, fsdata);
+ page = NULL;
+
+out:
+ if (unlikely(page)) {
+ if (PageLocked(page))
+ unlock_page(page);
+ page_cache_release(page);
+ }
+out2:
+ ext4_journal_stop(handle);
+
+ return ret < 0 ? ret : 0;
+}
+
+/**
+ * mext_check_argumants - Check whether move extent can be done
+ *
+ * @orig_inode: original inode
+ * @donor_inode: donor inode
+ * @orig_start: logical start offset in block for orig
+ * @donor_start: logical start offset in block for donor
+ * @len: the number of blocks to be moved
+ * @moved_len: moved block length
+ *
+ * Check the arguments of ext4_move_extents() whether the files can be
+ * exchanged with each other.
+ * Return 0 on success, or a negative error value on failure.
+ */
+static int
+mext_check_arguments(struct inode *orig_inode,
+ struct inode *donor_inode, __u64 orig_start,
+ __u64 donor_start, __u64 *len, __u64 moved_len)
+{
+ /* Regular file check */
+ if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
+ ext4_debug("ext4 move extent: The argument files should be "
+ "regular file [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ /* Ext4 move extent does not support swapfile */
+ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+ ext4_debug("ext4 move extent: The argument files should "
+ "not be swapfile [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ /* Files should be in the same ext4 FS */
+ if (orig_inode->i_sb != donor_inode->i_sb) {
+ ext4_debug("ext4 move extent: The argument files "
+ "should be in same FS [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ /* orig and donor should be different file */
+ if (orig_inode->i_ino == donor_inode->i_ino) {
+ ext4_debug("ext4 move extent: The argument files should not "
+ "be same file [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ /* Ext4 move extent supports only extent based file */
+ if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
+ ext4_debug("ext4 move extent: orig file is not extents "
+ "based file [ino:orig %lu]\n", orig_inode->i_ino);
+ return -EOPNOTSUPP;
+ } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
+ ext4_debug("ext4 move extent: donor file is not extents "
+ "based file [ino:donor %lu]\n", donor_inode->i_ino);
+ return -EOPNOTSUPP;
+ }
+
+ if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
+ ext4_debug("ext4 move extent: File size is 0 byte\n");
+ return -EINVAL;
+ }
+
+ /* Start offset should be same */
+ if (orig_start != donor_start) {
+ ext4_debug("ext4 move extent: orig and donor's start "
+ "offset are not same [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ if (moved_len) {
+ ext4_debug("ext4 move extent: moved_len should be 0 "
+ "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+ donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ if ((orig_start > MAX_DEFRAG_SIZE) ||
+ (donor_start > MAX_DEFRAG_SIZE) ||
+ (*len > MAX_DEFRAG_SIZE) ||
+ (orig_start + *len > MAX_DEFRAG_SIZE)) {
+ ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
+ "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ if (orig_inode->i_size > donor_inode->i_size) {
+ if (orig_start >= donor_inode->i_size) {
+ ext4_debug("ext4 move extent: orig start offset "
+ "[%llu] should be less than donor file size "
+ "[%lld] [ino:orig %lu, donor_inode %lu]\n",
+ orig_start, donor_inode->i_size,
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ if (orig_start + *len > donor_inode->i_size) {
+ ext4_debug("ext4 move extent: End offset [%llu] should "
+ "be less than donor file size [%lld]."
+ "So adjust length from %llu to %lld "
+ "[ino:orig %lu, donor %lu]\n",
+ orig_start + *len, donor_inode->i_size,
+ *len, donor_inode->i_size - orig_start,
+ orig_inode->i_ino, donor_inode->i_ino);
+ *len = donor_inode->i_size - orig_start;
+ }
+ } else {
+ if (orig_start >= orig_inode->i_size) {
+ ext4_debug("ext4 move extent: start offset [%llu] "
+ "should be less than original file size "
+ "[%lld] [inode:orig %lu, donor %lu]\n",
+ orig_start, orig_inode->i_size,
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ if (orig_start + *len > orig_inode->i_size) {
+ ext4_debug("ext4 move extent: Adjust length "
+ "from %llu to %lld. Because it should be "
+ "less than original file size "
+ "[ino:orig %lu, donor %lu]\n",
+ *len, orig_inode->i_size - orig_start,
+ orig_inode->i_ino, donor_inode->i_ino);
+ *len = orig_inode->i_size - orig_start;
+ }
+ }
+
+ if (!*len) {
+ ext4_debug("ext4 move extent: len shoudld not be 0 "
+ "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+ donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
+ *
+ * @inode1: the inode structure
+ * @inode2: the inode structure
+ *
+ * Lock two inodes' i_mutex by i_ino order. This function is moved from
+ * fs/inode.c.
+ */
+static void
+mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+ if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
+ if (inode1)
+ mutex_lock(&inode1->i_mutex);
+ else if (inode2)
+ mutex_lock(&inode2->i_mutex);
+ return;
+ }
+
+ if (inode1->i_ino < inode2->i_ino) {
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ } else {
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+ }
+}
+
+/**
+ * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
+ *
+ * @inode1: the inode that is released first
+ * @inode2: the inode that is released second
+ *
+ * This function is moved from fs/inode.c.
+ */
+
+static void
+mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+ if (inode1)
+ mutex_unlock(&inode1->i_mutex);
+
+ if (inode2 && inode2 != inode1)
+ mutex_unlock(&inode2->i_mutex);
+}
+
+/**
+ * ext4_move_extents - Exchange the specified range of a file
+ *
+ * @o_filp: file structure of the original file
+ * @d_filp: file structure of the donor file
+ * @orig_start: start offset in block for orig
+ * @donor_start: start offset in block for donor
+ * @len: the number of blocks to be moved
+ * @moved_len: moved block length
+ *
+ * This function returns 0 and moved block length is set in moved_len
+ * if succeed, otherwise returns error value.
+ *
+ * Note: ext4_move_extents() proceeds the following order.
+ * 1:ext4_move_extents() calculates the last block number of moving extent
+ * function by the start block number (orig_start) and the number of blocks
+ * to be moved (len) specified as arguments.
+ * If the {orig, donor}_start points a hole, the extent's start offset
+ * pointed by ext_cur (current extent), holecheck_path, orig_path are set
+ * after hole behind.
+ * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
+ * or the ext_cur exceeds the block_end which is last logical block number.
+ * 3:To get the length of continues area, call mext_next_extent()
+ * specified with the ext_cur (initial value is holecheck_path) re-cursive,
+ * until find un-continuous extent, the start logical block number exceeds
+ * the block_end or the extent points to the last extent.
+ * 4:Exchange the original inode data with donor inode data
+ * from orig_page_offset to seq_end_page.
+ * The start indexes of data are specified as arguments.
+ * That of the original inode is orig_page_offset,
+ * and the donor inode is also orig_page_offset
+ * (To easily handle blocksize != pagesize case, the offset for the
+ * donor inode is block unit).
+ * 5:Update holecheck_path and orig_path to points a next proceeding extent,
+ * then returns to step 2.
+ * 6:Release holecheck_path, orig_path and set the len to moved_len
+ * which shows the number of moved blocks.
+ * The moved_len is useful for the command to calculate the file offset
+ * for starting next move extent ioctl.
+ * 7:Return 0 on success, or a negative error value on failure.
+ */
+int
+ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ __u64 orig_start, __u64 donor_start, __u64 len,
+ __u64 *moved_len)
+{
+ struct inode *orig_inode = o_filp->f_dentry->d_inode;
+ struct inode *donor_inode = d_filp->f_dentry->d_inode;
+ struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
+ struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
+ ext4_lblk_t block_start = orig_start;
+ ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
+ ext4_lblk_t rest_blocks;
+ pgoff_t orig_page_offset = 0, seq_end_page;
+ int ret, depth, last_extent = 0;
+ int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ int data_offset_in_page;
+ int block_len_in_page;
+ int uninit;
+
+ /* protect orig and donor against a truncate */
+ mext_inode_double_lock(orig_inode, donor_inode);
+
+ mext_double_down_read(orig_inode, donor_inode);
+ /* Check the filesystem environment whether move_extent can be done */
+ ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
+ donor_start, &len, *moved_len);
+ mext_double_up_read(orig_inode, donor_inode);
+ if (ret)
+ goto out2;
+
+ file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
+ block_end = block_start + len - 1;
+ if (file_end < block_end)
+ len -= block_end - file_end;
+
+ get_ext_path(orig_path, orig_inode, block_start, ret);
+ if (orig_path == NULL)
+ goto out2;
+
+ /* Get path structure to check the hole */
+ get_ext_path(holecheck_path, orig_inode, block_start, ret);
+ if (holecheck_path == NULL)
+ goto out;
+
+ depth = ext_depth(orig_inode);
+ ext_cur = holecheck_path[depth].p_ext;
+ if (ext_cur == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Get proper extent whose ee_block is beyond block_start
+ * if block_start was within the hole.
+ */
+ if (le32_to_cpu(ext_cur->ee_block) +
+ ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
+ last_extent = mext_next_extent(orig_inode,
+ holecheck_path, &ext_cur);
+ if (last_extent < 0) {
+ ret = last_extent;
+ goto out;
+ }
+ last_extent = mext_next_extent(orig_inode, orig_path,
+ &ext_dummy);
+ if (last_extent < 0) {
+ ret = last_extent;
+ goto out;
+ }
+ }
+ seq_start = block_start;
+
+ /* No blocks within the specified range. */
+ if (le32_to_cpu(ext_cur->ee_block) > block_end) {
+ ext4_debug("ext4 move extent: The specified range of file "
+ "may be the hole\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Adjust start blocks */
+ add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
+ ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+ max(le32_to_cpu(ext_cur->ee_block), block_start);
+
+ while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+ seq_blocks += add_blocks;
+
+ /* Adjust tail blocks */
+ if (seq_start + seq_blocks - 1 > block_end)
+ seq_blocks = block_end - seq_start + 1;
+
+ ext_prev = ext_cur;
+ last_extent = mext_next_extent(orig_inode, holecheck_path,
+ &ext_cur);
+ if (last_extent < 0) {
+ ret = last_extent;
+ break;
+ }
+ add_blocks = ext4_ext_get_actual_len(ext_cur);
+
+ /*
+ * Extend the length of contiguous block (seq_blocks)
+ * if extents are contiguous.
+ */
+ if (ext4_can_extents_be_merged(orig_inode,
+ ext_prev, ext_cur) &&
+ block_end >= le32_to_cpu(ext_cur->ee_block) &&
+ !last_extent)
+ continue;
+
+ /* Is original extent is uninitialized */
+ uninit = ext4_ext_is_uninitialized(ext_prev);
+
+ data_offset_in_page = seq_start % blocks_per_page;
+
+ /*
+ * Calculate data blocks count that should be swapped
+ * at the first page.
+ */
+ if (data_offset_in_page + seq_blocks > blocks_per_page) {
+ /* Swapped blocks are across pages */
+ block_len_in_page =
+ blocks_per_page - data_offset_in_page;
+ } else {
+ /* Swapped blocks are in a page */
+ block_len_in_page = seq_blocks;
+ }
+
+ orig_page_offset = seq_start >>
+ (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
+ seq_end_page = (seq_start + seq_blocks - 1) >>
+ (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
+ seq_start = le32_to_cpu(ext_cur->ee_block);
+ rest_blocks = seq_blocks;
+
+ /* Discard preallocations of two inodes */
+ down_write(&EXT4_I(orig_inode)->i_data_sem);
+ ext4_discard_preallocations(orig_inode);
+ up_write(&EXT4_I(orig_inode)->i_data_sem);
+
+ down_write(&EXT4_I(donor_inode)->i_data_sem);
+ ext4_discard_preallocations(donor_inode);
+ up_write(&EXT4_I(donor_inode)->i_data_sem);
+
+ while (orig_page_offset <= seq_end_page) {
+
+ /* Swap original branches with new branches */
+ ret = move_extent_par_page(o_filp, donor_inode,
+ orig_page_offset,
+ data_offset_in_page,
+ block_len_in_page, uninit);
+ if (ret < 0)
+ goto out;
+ orig_page_offset++;
+ /* Count how many blocks we have exchanged */
+ *moved_len += block_len_in_page;
+ BUG_ON(*moved_len > len);
+
+ data_offset_in_page = 0;
+ rest_blocks -= block_len_in_page;
+ if (rest_blocks > blocks_per_page)
+ block_len_in_page = blocks_per_page;
+ else
+ block_len_in_page = rest_blocks;
+ }
+
+ /* Decrease buffer counter */
+ if (holecheck_path)
+ ext4_ext_drop_refs(holecheck_path);
+ get_ext_path(holecheck_path, orig_inode,
+ seq_start, ret);
+ if (holecheck_path == NULL)
+ break;
+ depth = holecheck_path->p_depth;
+
+ /* Decrease buffer counter */
+ if (orig_path)
+ ext4_ext_drop_refs(orig_path);
+ get_ext_path(orig_path, orig_inode, seq_start, ret);
+ if (orig_path == NULL)
+ break;
+
+ ext_cur = holecheck_path[depth].p_ext;
+ add_blocks = ext4_ext_get_actual_len(ext_cur);
+ seq_blocks = 0;
+
+ }
+out:
+ if (orig_path) {
+ ext4_ext_drop_refs(orig_path);
+ kfree(orig_path);
+ }
+ if (holecheck_path) {
+ ext4_ext_drop_refs(holecheck_path);
+ kfree(holecheck_path);
+ }
+out2:
+ mext_inode_double_unlock(orig_inode, donor_inode);
+
+ if (ret)
+ return ret;
+
+ /* All of the specified blocks must be exchanged in succeed */
+ BUG_ON(*moved_len != len);
+
+ return 0;
+}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 07eb6649e4fa..de04013d16ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1782,7 +1782,7 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode (handle, dir, mode);
+ inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, mode);
+ inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
+ inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
+ &dentry->d_name, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
+ inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
+ &dentry->d_name, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 27eb289eea37..68b0351fc647 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1002,7 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
" too large to resize to %llu blocks safely\n",
sb->s_id, n_blocks_count);
if (sizeof(sector_t) < 8)
- ext4_warning(sb, __func__, "CONFIG_LBD not enabled");
+ ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
return -EINVAL;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397e..8f4f079e6b9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,7 +37,6 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/ctype.h>
-#include <linux/marker.h>
#include <linux/log2.h>
#include <linux/crc16.h>
#include <asm/uaccess.h>
@@ -47,6 +46,9 @@
#include "xattr.h"
#include "acl.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext4.h>
+
static int default_mb_history_length = 1000;
module_param_named(default_mb_history_length, default_mb_history_length,
@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal;
- EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
+ EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (journal)
jbd2_journal_abort(journal, -EIO);
}
@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
sb->s_flags |= MS_RDONLY;
- EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
+ EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
@@ -664,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- ei->i_acl = EXT4_ACL_NOT_CACHED;
- ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
ei->vfs_inode.i_version = 1;
ei->vfs_inode.i_data.writeback_index = 0;
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -733,18 +731,6 @@ static void destroy_inodecache(void)
static void ext4_clear_inode(struct inode *inode)
{
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- if (EXT4_I(inode)->i_acl &&
- EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
- posix_acl_release(EXT4_I(inode)->i_acl);
- EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
- }
- if (EXT4_I(inode)->i_default_acl &&
- EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
- posix_acl_release(EXT4_I(inode)->i_default_acl);
- EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
- }
-#endif
ext4_discard_preallocations(inode);
if (EXT4_JOURNAL(inode))
jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -1474,7 +1460,7 @@ set_qf_format:
break;
#endif
case Opt_abort:
- set_opt(sbi->s_mount_opt, ABORT);
+ sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
break;
case Opt_nobarrier:
clear_opt(sbi->s_mount_opt, BARRIER);
@@ -1653,7 +1639,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_commit_super(sb, 1);
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
- "bpg=%lu, ipg=%lu, mo=%04lx]\n",
+ "bpg=%lu, ipg=%lu, mo=%04x]\n",
sb->s_blocksize,
sbi->s_groups_count,
EXT4_BLOCKS_PER_GROUP(sb),
@@ -1957,7 +1943,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
/* small i_blocks in vfs inode? */
if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
/*
- * CONFIG_LBD is not enabled implies the inode
+ * CONFIG_LBDAF is not enabled implies the inode
* i_block represent total blocks in 512 bytes
* 32 == size of vfs inode i_blocks * 8
*/
@@ -2000,7 +1986,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
/*
- * !has_huge_files or CONFIG_LBD not enabled implies that
+ * !has_huge_files or CONFIG_LBDAF not enabled implies that
* the inode i_block field represents total file blocks in
* 2^32 512-byte sectors == size of vfs inode i_blocks * 8
*/
@@ -2204,6 +2190,7 @@ EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks);
+EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2216,6 +2203,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(inode_readahead_blks),
+ ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
ATTR_LIST(mb_max_to_scan),
ATTR_LIST(mb_min_to_scan),
@@ -2436,13 +2424,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (has_huge_files) {
/*
* Large file size enabled file system can only be
- * mount if kernel is build with CONFIG_LBD
+ * mount if kernel is build with CONFIG_LBDAF
*/
if (sizeof(root->i_blocks) < sizeof(u64) &&
!(sb->s_flags & MS_RDONLY)) {
ext4_msg(sb, KERN_ERR, "Filesystem with huge "
"files cannot be mounted read-write "
- "without CONFIG_LBD");
+ "without CONFIG_LBDAF");
goto failed_mount;
}
}
@@ -2566,7 +2554,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely");
if (sizeof(sector_t) < 8)
- ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
+ ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
goto failed_mount;
}
@@ -3346,7 +3334,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
int ret = 0;
tid_t target;
- trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
+ trace_ext4_sync_fs(sb, wait);
if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
if (wait)
jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
@@ -3450,7 +3438,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
+ if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, __func__, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -3465,7 +3453,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > ext4_blocks_count(es)) {
- if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
+ if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
goto restore_opts;
}