summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c6
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/cifs/connect.c10
-rw-r--r--fs/cifs/file.c9
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c39
-rw-r--r--fs/direct-io.c9
-rw-r--r--fs/exec.c6
-rw-r--r--fs/ext4/balloc.c1
-rw-r--r--fs/ext4/dir.c25
-rw-r--r--fs/ext4/ext4.h14
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/indirect.c281
-rw-r--r--fs/ext4/inline.c18
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/mballoc.c41
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/super.c88
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c178
-rw-r--r--fs/f2fs/data.c59
-rw-r--r--fs/f2fs/debug.c19
-rw-r--r--fs/f2fs/dir.c87
-rw-r--r--fs/f2fs/f2fs.h50
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c7
-rw-r--r--fs/f2fs/hash.c4
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c12
-rw-r--r--fs/f2fs/namei.c246
-rw-r--r--fs/f2fs/node.c273
-rw-r--r--fs/f2fs/node.h7
-rw-r--r--fs/f2fs/recovery.c22
-rw-r--r--fs/f2fs/segment.c38
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c21
-rw-r--r--fs/fs-writeback.c3
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/fscache/internal.h2
-rw-r--r--fs/fscache/main.c18
-rw-r--r--fs/fscache/page.c4
-rw-r--r--fs/gfs2/glock.c25
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/recovery.c8
-rw-r--r--fs/gfs2/super.c8
-rw-r--r--fs/inode.c7
-rw-r--r--fs/jbd2/transaction.c10
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/lockd/mon.c4
-rw-r--r--fs/locks.c26
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c4
-rw-r--r--fs/nfs/idmap.c10
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/pagelist.c14
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/open.c5
-rw-r--r--fs/proc/array.c18
-rw-r--r--fs/timerfd.c77
68 files changed, 1306 insertions, 806 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 1c9c5f0a9e2b..bd7ec2cc2674 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
aio_free_ring(ctx);
free_percpu(ctx->cpu);
+ percpu_ref_exit(&ctx->reqs);
+ percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx);
}
@@ -715,8 +717,8 @@ err_ctx:
err:
mutex_unlock(&ctx->ring_lock);
free_percpu(ctx->cpu);
- free_percpu(ctx->reqs.pcpu_count);
- free_percpu(ctx->users.pcpu_count);
+ percpu_ref_exit(&ctx->reqs);
+ percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a389820d158b..3e11aab9f391 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3437,16 +3437,10 @@ done_unlocked:
return 0;
}
-static int eb_wait(void *word)
-{
- io_schedule();
- return 0;
-}
-
void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
{
- wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
+ TASK_UNINTERRUPTIBLE);
}
static noinline_for_stack int
diff --git a/fs/buffer.c b/fs/buffer.c
index eba6e4f621ce..8f05111bbb8b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh)
}
EXPORT_SYMBOL(touch_buffer);
-static int sleep_on_buffer(void *word)
-{
- io_schedule();
- return 0;
-}
-
void __lock_buffer(struct buffer_head *bh)
{
- wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__lock_buffer);
@@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback);
*/
void __wait_on_buffer(struct buffer_head * bh)
{
- wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__wait_on_buffer);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 20d75b8ddb26..b98366f21f9e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
}
-static int
-cifs_sb_tcon_pending_wait(void *unused)
-{
- schedule();
- return signal_pending(current) ? -ERESTARTSYS : 0;
-}
-
/* find and return a tlink with given uid */
static struct tcon_link *
tlink_rb_search(struct rb_root *root, kuid_t uid)
@@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
} else {
wait_for_construction:
ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
- cifs_sb_tcon_pending_wait,
TASK_INTERRUPTIBLE);
if (ret) {
cifs_put_tlink(tlink);
- return ERR_PTR(ret);
+ return ERR_PTR(-ERESTARTSYS);
}
/* if it's good, return it */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e90a1e9aa627..b88b1ade4d3d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3618,13 +3618,6 @@ static int cifs_launder_page(struct page *page)
return rc;
}
-static int
-cifs_pending_writers_wait(void *unused)
-{
- schedule();
- return 0;
-}
-
void cifs_oplock_break(struct work_struct *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3636,7 +3629,7 @@ void cifs_oplock_break(struct work_struct *work)
int rc = 0;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
- cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
server->ops->downgrade_oplock(server, cinode,
test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a174605f6afa..41de3935caa0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode)
* @word: long word containing the bit lock
*/
static int
-cifs_wait_bit_killable(void *word)
+cifs_wait_bit_killable(struct wait_bit_key *key)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode)
int rc;
unsigned long *flags = &CIFS_I(inode)->flags;
- rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
- TASK_KILLABLE);
+ rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
+ TASK_KILLABLE);
if (rc)
return rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b0c62e622da..6bf55d0ed494 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -582,7 +582,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode)
start:
rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
- cifs_oplock_break_wait, TASK_KILLABLE);
+ TASK_KILLABLE);
if (rc)
return rc;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 63146295153b..76c08c2beb2f 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -451,7 +451,7 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf,
{
char buf[3];
u32 *val = file->private_data;
-
+
if (*val)
buf[0] = 'Y';
else
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8c41b52da358..1e3b99d3db0d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -66,7 +66,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
break;
}
}
- return inode;
+ return inode;
}
/* SMP-safe */
@@ -317,7 +317,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
goto exit;
/* If the parent is not specified, we create it in the root.
- * We need the root dentry to do this, which is in the super
+ * We need the root dentry to do this, which is in the super
* block. A pointer to that is in the struct vfsmount that we
* have around.
*/
@@ -330,7 +330,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
switch (mode & S_IFMT) {
case S_IFDIR:
error = debugfs_mkdir(parent->d_inode, dentry, mode);
-
+
break;
case S_IFLNK:
error = debugfs_link(parent->d_inode, dentry, mode,
@@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
*/
void debugfs_remove_recursive(struct dentry *dentry)
{
- struct dentry *child, *next, *parent;
+ struct dentry *child, *parent;
if (IS_ERR_OR_NULL(dentry))
return;
@@ -546,30 +546,49 @@ void debugfs_remove_recursive(struct dentry *dentry)
parent = dentry;
down:
mutex_lock(&parent->d_inode->i_mutex);
- list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
+ loop:
+ /*
+ * The parent->d_subdirs is protected by the d_lock. Outside that
+ * lock, the child can be unlinked and set to be freed which can
+ * use the d_u.d_child as the rcu head and corrupt this list.
+ */
+ spin_lock(&parent->d_lock);
+ list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
if (!debugfs_positive(child))
continue;
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
+ spin_unlock(&parent->d_lock);
mutex_unlock(&parent->d_inode->i_mutex);
parent = child;
goto down;
}
- up:
+
+ spin_unlock(&parent->d_lock);
+
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+
+ /*
+ * The parent->d_lock protects agaist child from unlinking
+ * from d_subdirs. When releasing the parent->d_lock we can
+ * no longer trust that the next pointer is valid.
+ * Restart the loop. We'll skip this one with the
+ * debugfs_positive() check.
+ */
+ goto loop;
}
+ spin_unlock(&parent->d_lock);
mutex_unlock(&parent->d_inode->i_mutex);
child = parent;
parent = parent->d_parent;
mutex_lock(&parent->d_inode->i_mutex);
- if (child != dentry) {
- next = list_next_entry(child, d_u.d_child);
- goto up;
- }
+ if (child != dentry)
+ /* go up */
+ goto loop;
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 194d0d122cae..17e39b047de5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -71,7 +71,6 @@ struct dio_submit {
been performed at the start of a
write */
int pages_in_io; /* approximate total IO pages */
- size_t size; /* total request size (doesn't change)*/
sector_t block_in_file; /* Current offset into the underlying
file in dio_block units. */
unsigned blocks_available; /* At block_in_file. changes */
@@ -1104,7 +1103,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
- loff_t end = offset + iov_iter_count(iter);
+ size_t count = iov_iter_count(iter);
+ loff_t end = offset + count;
struct dio *dio;
struct dio_submit sdio = { 0, };
struct buffer_head map_bh = { 0, };
@@ -1287,10 +1287,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
*/
BUG_ON(retval == -EIOCBQUEUED);
if (dio->is_async && retval == 0 && dio->result &&
- ((rw == READ) || (dio->result == sdio.size)))
+ (rw == READ || dio->result == count))
retval = -EIOCBQUEUED;
-
- if (retval != -EIOCBQUEUED)
+ else
dio_await_completion(dio);
if (drop_refcount(dio) == 0) {
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..ab1f1200ce5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
/*
* determine how safe it is to execute the proposed program
* - the caller must hold ->cred_guard_mutex to protect against
- * PTRACE_ATTACH
+ * PTRACE_ATTACH or seccomp thread-sync
*/
static void check_unsafe_exec(struct linux_binprm *bprm)
{
@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* This isn't strictly necessary, but it makes it harder for LSMs to
* mess up.
*/
- if (current->no_new_privs)
+ if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
t = p;
@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !current->no_new_privs &&
+ !task_no_new_privs(current) &&
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index fca382037ddd..581ef40fbe90 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -639,7 +639,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
if (!(*errp) &&
ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode,
EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ef1bed66c14f..0bb3f9ea0832 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -571,6 +571,31 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
return 0;
}
+int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
+ int buf_size)
+{
+ struct ext4_dir_entry_2 *de;
+ int nlen, rlen;
+ unsigned int offset = 0;
+ char *top;
+
+ de = (struct ext4_dir_entry_2 *)buf;
+ top = buf + buf_size;
+ while ((char *) de < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh,
+ buf, buf_size, offset))
+ return -EIO;
+ nlen = EXT4_DIR_REC_LEN(de->name_len);
+ rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+ de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+ offset += rlen;
+ }
+ if ((char *) de > top)
+ return -EIO;
+
+ return 0;
+}
+
const struct file_operations ext4_dir_operations = {
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7cc5a0e23688..5b19760b1de5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -591,7 +591,6 @@ enum {
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
-#define EXT4_FREE_BLOCKS_RESERVE 0x0040
/*
* ioctl commands
@@ -2029,6 +2028,8 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype)
return ext4_filetype_table[filetype];
}
+extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
+ void *buf, int buf_size);
/* fsync.c */
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -2144,8 +2145,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
-extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t first, ext4_lblk_t stop);
+extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
+ ext4_lblk_t start, ext4_lblk_t end);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2560,7 +2561,6 @@ extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
-extern int ext4_has_inline_data(struct inode *inode);
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
@@ -2626,6 +2626,12 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode);
+static inline int ext4_has_inline_data(struct inode *inode)
+{
+ return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
+ EXT4_I(inode)->i_inline_off;
+}
+
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a0e6d0..76c2df382b7d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -161,6 +161,8 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
struct inode *inode, struct ext4_ext_path *path)
{
int err;
+
+ WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
if (path->p_bh) {
ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
/* path points to block */
@@ -1808,8 +1810,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
brelse(path[1].p_bh);
ext4_free_blocks(handle, inode, NULL, blk, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
- EXT4_FREE_BLOCKS_RESERVE);
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
}
/*
@@ -3253,7 +3254,7 @@ out:
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
- ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_dirty(handle, inode, path + path->p_depth);
return err;
}
@@ -5403,16 +5404,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
int ret;
/* Collapse range works only on fs block size aligned offsets. */
- if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
- len & (EXT4_BLOCK_SIZE(sb) - 1))
+ if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
+ len & (EXT4_CLUSTER_SIZE(sb) - 1))
return -EINVAL;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
- return -EOPNOTSUPP;
-
trace_ext4_collapse_range(inode, offset, len);
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8695f70af1ef..aca7b24a4432 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,10 +200,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct address_space *mapping = file->f_mapping;
-
- if (!mapping->a_ops->readpage)
- return -ENOEXEC;
file_accessed(file);
vma->vm_ops = &ext4_file_vm_ops;
return 0;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index fd69da194826..e75f840000a0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1295,97 +1295,220 @@ do_indirects:
}
}
-static int free_hole_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *parent_bh, __le32 *i_data,
- int level, ext4_lblk_t first,
- ext4_lblk_t count, int max)
+/**
+ * ext4_ind_remove_space - remove space from the range
+ * @handle: JBD handle for this transaction
+ * @inode: inode we are dealing with
+ * @start: First block to remove
+ * @end: One block after the last block to remove (exclusive)
+ *
+ * Free the blocks in the defined range (end is exclusive endpoint of
+ * range). This is used by ext4_punch_hole().
+ */
+int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
+ ext4_lblk_t start, ext4_lblk_t end)
{
- struct buffer_head *bh = NULL;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ __le32 *i_data = ei->i_data;
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ret = 0;
- int i, inc;
- ext4_lblk_t offset;
- __le32 blk;
-
- inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
- for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
- if (offset >= count + first)
- break;
- if (*i_data == 0 || (offset + inc) <= first)
- continue;
- blk = *i_data;
- if (level > 0) {
- ext4_lblk_t first2;
- ext4_lblk_t count2;
+ ext4_lblk_t offsets[4], offsets2[4];
+ Indirect chain[4], chain2[4];
+ Indirect *partial, *partial2;
+ ext4_lblk_t max_block;
+ __le32 nr = 0, nr2 = 0;
+ int n = 0, n2 = 0;
+ unsigned blocksize = inode->i_sb->s_blocksize;
- bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
- "Read failure");
- return -EIO;
- }
- if (first > offset) {
- first2 = first - offset;
- count2 = count;
+ max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+ >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+ if (end >= max_block)
+ end = max_block;
+ if ((start >= end) || (start > max_block))
+ return 0;
+
+ n = ext4_block_to_path(inode, start, offsets, NULL);
+ n2 = ext4_block_to_path(inode, end, offsets2, NULL);
+
+ BUG_ON(n > n2);
+
+ if ((n == 1) && (n == n2)) {
+ /* We're punching only within direct block range */
+ ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+ i_data + offsets2[0]);
+ return 0;
+ } else if (n2 > n) {
+ /*
+ * Start and end are on a different levels so we're going to
+ * free partial block at start, and partial block at end of
+ * the range. If there are some levels in between then
+ * do_indirects label will take care of that.
+ */
+
+ if (n == 1) {
+ /*
+ * Start is at the direct block level, free
+ * everything to the end of the level.
+ */
+ ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+ i_data + EXT4_NDIR_BLOCKS);
+ goto end_range;
+ }
+
+
+ partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+ if (nr) {
+ if (partial == chain) {
+ /* Shared branch grows from the inode */
+ ext4_free_branches(handle, inode, NULL,
+ &nr, &nr+1, (chain+n-1) - partial);
+ *partial->p = 0;
} else {
- first2 = 0;
- count2 = count - (offset - first);
+ /* Shared branch grows from an indirect block */
+ BUFFER_TRACE(partial->bh, "get_write_access");
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p,
+ partial->p+1, (chain+n-1) - partial);
}
- ret = free_hole_blocks(handle, inode, bh,
- (__le32 *)bh->b_data, level - 1,
- first2, count2,
- inode->i_sb->s_blocksize >> 2);
- if (ret) {
- brelse(bh);
- goto err;
+ }
+
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the start of the range
+ */
+ while (partial > chain) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ partial--;
+ }
+
+end_range:
+ partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ if (nr2) {
+ if (partial2 == chain2) {
+ /*
+ * Remember, end is exclusive so here we're at
+ * the start of the next level we're not going
+ * to free. Everything was covered by the start
+ * of the range.
+ */
+ return 0;
+ } else {
+ /* Shared branch grows from an indirect block */
+ partial2--;
}
+ } else {
+ /*
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
+ */
+ partial2->p++;
}
- if (level == 0 ||
- (bh && all_zeroes((__le32 *)bh->b_data,
- (__le32 *)bh->b_data + addr_per_block))) {
- ext4_free_data(handle, inode, parent_bh,
- i_data, i_data + 1);
+
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the end of the range
+ */
+ while (partial2 > chain2) {
+ ext4_free_branches(handle, inode, partial2->bh,
+ (__le32 *)partial2->bh->b_data,
+ partial2->p,
+ (chain2+n2-1) - partial2);
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ partial2--;
}
- brelse(bh);
- bh = NULL;
+ goto do_indirects;
}
-err:
- return ret;
-}
-
-int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t first, ext4_lblk_t stop)
-{
- int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int level, ret = 0;
- int num = EXT4_NDIR_BLOCKS;
- ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
- __le32 *i_data = EXT4_I(inode)->i_data;
-
- count = stop - first;
- for (level = 0; level < 4; level++, max *= addr_per_block) {
- if (first < max) {
- ret = free_hole_blocks(handle, inode, NULL, i_data,
- level, first, count, num);
- if (ret)
- goto err;
- if (count > max - first)
- count -= max - first;
- else
- break;
- first = 0;
- } else {
- first -= max;
+ /* Punch happened within the same level (n == n2) */
+ partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ /*
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
+ */
+ partial2->p++;
+ while ((partial > chain) || (partial2 > chain2)) {
+ /* We're at the same block, so we're almost finished */
+ if ((partial->bh && partial2->bh) &&
+ (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
+ if ((partial > chain) && (partial2 > chain2)) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ partial2->p,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ }
+ return 0;
}
- i_data += num;
- if (level == 0) {
- num = 1;
- max = 1;
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the start of the range
+ */
+ if (partial > chain) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ partial--;
+ }
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the end of the range
+ */
+ if (partial2 > chain2) {
+ ext4_free_branches(handle, inode, partial2->bh,
+ (__le32 *)partial2->bh->b_data,
+ partial2->p,
+ (chain2+n-1) - partial2);
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ partial2--;
}
}
-err:
- return ret;
+do_indirects:
+ /* Kill the remaining (whole) subtrees */
+ switch (offsets[0]) {
+ default:
+ if (++n >= n2)
+ return 0;
+ nr = i_data[EXT4_IND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+ i_data[EXT4_IND_BLOCK] = 0;
+ }
+ case EXT4_IND_BLOCK:
+ if (++n >= n2)
+ return 0;
+ nr = i_data[EXT4_DIND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+ i_data[EXT4_DIND_BLOCK] = 0;
+ }
+ case EXT4_DIND_BLOCK:
+ if (++n >= n2)
+ return 0;
+ nr = i_data[EXT4_TIND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+ i_data[EXT4_TIND_BLOCK] = 0;
+ }
+ case EXT4_TIND_BLOCK:
+ ;
+ }
+ return 0;
}
-
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 645205d8ada6..bea662bd0ca6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -120,12 +120,6 @@ int ext4_get_max_inline_size(struct inode *inode)
return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
}
-int ext4_has_inline_data(struct inode *inode)
-{
- return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
- EXT4_I(inode)->i_inline_off;
-}
-
/*
* this function does not take xattr_sem, which is OK because it is
* currently only used in a code path coming form ext4_iget, before
@@ -1178,6 +1172,18 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
if (error < 0)
goto out;
+ /*
+ * Make sure the inline directory entries pass checks before we try to
+ * convert them, so that we avoid touching stuff that needs fsck.
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ error = ext4_check_all_de(inode, iloc->bh,
+ buf + EXT4_INLINE_DOTDOT_SIZE,
+ inline_size - EXT4_INLINE_DOTDOT_SIZE);
+ if (error)
+ goto out;
+ }
+
error = ext4_destroy_inline_data_nolock(handle, inode);
if (error)
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8a064734e6eb..367a60c07cf0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -325,18 +325,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
#endif
/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a block located at @lblock
- */
-static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
-{
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- return ext4_ext_calc_metadata_amount(inode, lblock);
-
- return ext4_ind_calc_metadata_amount(inode, lblock);
-}
-
-/*
* Called with i_data_sem down, which is important since we can call
* ext4_discard_preallocations() from here.
*/
@@ -357,35 +345,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
used = ei->i_reserved_data_blocks;
}
- if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
- ext4_warning(inode->i_sb, "ino %lu, allocated %d "
- "with only %d reserved metadata blocks "
- "(releasing %d blocks with reserved %d data blocks)",
- inode->i_ino, ei->i_allocated_meta_blocks,
- ei->i_reserved_meta_blocks, used,
- ei->i_reserved_data_blocks);
- WARN_ON(1);
- ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
- }
-
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
- ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- used + ei->i_allocated_meta_blocks);
- ei->i_allocated_meta_blocks = 0;
+ percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
- if (ei->i_reserved_data_blocks == 0) {
- /*
- * We can release all of the reserved metadata blocks
- * only when we have written all of the delayed
- * allocation blocks.
- */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- ei->i_reserved_meta_blocks);
- ei->i_reserved_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- }
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
/* Update quota subsystem for data blocks */
@@ -1222,49 +1185,6 @@ static int ext4_journalled_write_end(struct file *file,
}
/*
- * Reserve a metadata for a single block located at lblock
- */
-static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned int md_needed;
- ext4_lblk_t save_last_lblock;
- int save_len;
-
- /*
- * recalculate the amount of metadata blocks to reserve
- * in order to allocate nrblocks
- * worse case is one extent per block
- */
- spin_lock(&ei->i_block_reservation_lock);
- /*
- * ext4_calc_metadata_amount() has side effects, which we have
- * to be prepared undo if we fail to claim space.
- */
- save_len = ei->i_da_metadata_calc_len;
- save_last_lblock = ei->i_da_metadata_calc_last_lblock;
- md_needed = EXT4_NUM_B2C(sbi,
- ext4_calc_metadata_amount(inode, lblock));
- trace_ext4_da_reserve_space(inode, md_needed);
-
- /*
- * We do still charge estimated metadata to the sb though;
- * we cannot afford to run out of free blocks.
- */
- if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
- ei->i_da_metadata_calc_len = save_len;
- ei->i_da_metadata_calc_last_lblock = save_last_lblock;
- spin_unlock(&ei->i_block_reservation_lock);
- return -ENOSPC;
- }
- ei->i_reserved_meta_blocks += md_needed;
- spin_unlock(&ei->i_block_reservation_lock);
-
- return 0; /* success */
-}
-
-/*
* Reserve a single cluster located at lblock
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1273,8 +1193,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
- ext4_lblk_t save_last_lblock;
- int save_len;
/*
* We will charge metadata quota at writeout time; this saves
@@ -1295,25 +1213,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* ext4_calc_metadata_amount() has side effects, which we have
* to be prepared undo if we fail to claim space.
*/
- save_len = ei->i_da_metadata_calc_len;
- save_last_lblock = ei->i_da_metadata_calc_last_lblock;
- md_needed = EXT4_NUM_B2C(sbi,
- ext4_calc_metadata_amount(inode, lblock));
- trace_ext4_da_reserve_space(inode, md_needed);
+ md_needed = 0;
+ trace_ext4_da_reserve_space(inode, 0);
- /*
- * We do still charge estimated metadata to the sb though;
- * we cannot afford to run out of free blocks.
- */
- if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
- ei->i_da_metadata_calc_len = save_len;
- ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+ if (ext4_claim_free_clusters(sbi, 1, 0)) {
spin_unlock(&ei->i_block_reservation_lock);
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
ei->i_reserved_data_blocks++;
- ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
@@ -1346,20 +1254,6 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
}
ei->i_reserved_data_blocks -= to_free;
- if (ei->i_reserved_data_blocks == 0) {
- /*
- * We can release all of the reserved metadata blocks
- * only when we have written all of the delayed
- * allocation blocks.
- * Note that in case of bigalloc, i_reserved_meta_blocks,
- * i_reserved_data_blocks, etc. refer to number of clusters.
- */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- ei->i_reserved_meta_blocks);
- ei->i_reserved_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- }
-
/* update fs dirty data blocks counter */
percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
@@ -1500,10 +1394,6 @@ static void ext4_print_free_blocks(struct inode *inode)
ext4_msg(sb, KERN_CRIT, "Block reservation details");
ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
ei->i_reserved_data_blocks);
- ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
- ei->i_reserved_meta_blocks);
- ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
- ei->i_allocated_meta_blocks);
return;
}
@@ -1620,13 +1510,6 @@ add_delayed:
retval = ret;
goto out_unlock;
}
- } else {
- ret = ext4_da_reserve_metadata(inode, iblock);
- if (ret) {
- /* not enough space to reserve */
- retval = ret;
- goto out_unlock;
- }
}
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2843,8 +2726,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
{
trace_ext4_alloc_da_blocks(inode);
- if (!EXT4_I(inode)->i_reserved_data_blocks &&
- !EXT4_I(inode)->i_reserved_meta_blocks)
+ if (!EXT4_I(inode)->i_reserved_data_blocks)
return 0;
/*
@@ -3624,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
ret = ext4_ext_remove_space(inode, first_block,
stop_block - 1);
else
- ret = ext4_free_hole_blocks(handle, inode, first_block,
+ ret = ext4_ind_remove_space(handle, inode, first_block,
stop_block);
up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 2dcb936be90e..956027711faf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3075,8 +3075,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
(23 - bsbits)) << 23;
size = 8 * 1024 * 1024;
} else {
- start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
- size = ac->ac_o_ex.fe_len << bsbits;
+ start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
+ size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
+ ac->ac_o_ex.fe_len) << bsbits;
}
size = size >> bsbits;
start = start_off >> bsbits;
@@ -3216,8 +3217,27 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
{
struct ext4_prealloc_space *pa = ac->ac_pa;
+ struct ext4_buddy e4b;
+ int err;
- if (pa && pa->pa_type == MB_INODE_PA)
+ if (pa == NULL) {
+ err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+ if (err) {
+ /*
+ * This should never happen since we pin the
+ * pages in the ext4_allocation_context so
+ * ext4_mb_load_buddy() should never fail.
+ */
+ WARN(1, "mb_load_buddy failed (%d)", err);
+ return;
+ }
+ ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+ ac->ac_f_ex.fe_len);
+ ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ return;
+ }
+ if (pa->pa_type == MB_INODE_PA)
pa->pa_free += ac->ac_b_ex.fe_len;
}
@@ -4627,7 +4647,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *gd_bh;
ext4_group_t block_group;
struct ext4_sb_info *sbi;
- struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_buddy e4b;
unsigned int count_clusters;
int err = 0;
@@ -4838,19 +4857,7 @@ do_more:
&sbi->s_flex_groups[flex_group].free_clusters);
}
- if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
- percpu_counter_add(&sbi->s_dirtyclusters_counter,
- count_clusters);
- spin_lock(&ei->i_block_reservation_lock);
- if (flags & EXT4_FREE_BLOCKS_METADATA)
- ei->i_reserved_meta_blocks += count_clusters;
- else
- ei->i_reserved_data_blocks += count_clusters;
- spin_unlock(&ei->i_block_reservation_lock);
- if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
- dquot_reclaim_block(inode,
- EXT4_C2B(sbi, count_clusters));
- } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+ if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index ec092437d3e0..d3567f27bae7 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,6 +39,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
newext.ee_block = cpu_to_le32(lb->first_block);
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
ext4_ext_store_pblock(&newext, lb->first_pblock);
+ /* Locking only for convinience since we are operating on temp inode */
+ down_write(&EXT4_I(inode)->i_data_sem);
path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) {
@@ -61,7 +63,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
*/
if (needed && ext4_handle_has_enough_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS)) {
+ up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
+ down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
} else if (needed) {
@@ -70,13 +74,16 @@ static int finish_range(handle_t *handle, struct inode *inode,
/*
* IF not able to extend the journal restart the journal
*/
+ up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
+ down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
}
}
retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
err_out:
+ up_write((&EXT4_I(inode)->i_data_sem));
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2484c7ec6a72..671a74b14fd7 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1013,10 +1013,11 @@ data_copy:
*err = -EBUSY;
goto unlock_pages;
}
-
+ ext4_double_down_write_data_sem(orig_inode, donor_inode);
replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset,
block_len_in_page, err);
+ ext4_double_up_write_data_sem(orig_inode, donor_inode);
if (*err) {
if (replaced_count) {
block_len_in_page = replaced_count;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6df7bc611dbd..32b43ad154b9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2142,10 +2142,6 @@ static int ext4_check_descriptors(struct super_block *sb,
}
if (NULL != first_not_zeroed)
*first_not_zeroed = grp;
-
- ext4_free_blocks_count_set(sbi->s_es,
- EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
- sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
return 1;
}
@@ -3883,13 +3879,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
goto failed_mount2;
}
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
- if (!ext4_fill_flex_info(sb)) {
- ext4_msg(sb, KERN_ERR,
- "unable to initialize "
- "flex_bg meta info!");
- goto failed_mount2;
- }
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
@@ -3902,23 +3891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Register extent status tree shrinker */
ext4_es_register_shrinker(sbi);
- err = percpu_counter_init(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- if (!err) {
- err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
- }
- if (err) {
+ if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount3;
}
@@ -4022,18 +3995,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
- /*
- * The journal may have updated the bg summary counts, so we
- * need to update the global counters.
- */
- percpu_counter_set(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- percpu_counter_set(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- percpu_counter_set(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
-
no_journal:
if (ext4_mballoc_ready) {
sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
@@ -4141,6 +4102,33 @@ no_journal:
goto failed_mount5;
}
+ block = ext4_count_free_clusters(sb);
+ ext4_free_blocks_count_set(sbi->s_es,
+ EXT4_C2B(sbi, block));
+ err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+ if (!err) {
+ unsigned long freei = ext4_count_free_inodes(sb);
+ sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+ }
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "insufficient memory");
+ goto failed_mount6;
+ }
+
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+ if (!ext4_fill_flex_info(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "unable to initialize "
+ "flex_bg meta info!");
+ goto failed_mount6;
+ }
+
err = ext4_register_li_request(sb, first_not_zeroed);
if (err)
goto failed_mount6;
@@ -4215,6 +4203,12 @@ failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
ext4_mb_release(sb);
+ if (sbi->s_flex_groups)
+ ext4_kvfree(sbi->s_flex_groups);
+ percpu_counter_destroy(&sbi->s_freeclusters_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
failed_mount5:
ext4_ext_release(sb);
ext4_release_system_zone(sb);
@@ -4233,12 +4227,6 @@ failed_mount_wq:
failed_mount3:
ext4_es_unregister_shrinker(sbi);
del_timer_sync(&sbi->s_err_report);
- if (sbi->s_flex_groups)
- ext4_kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_extent_cache_cnt);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
@@ -4556,11 +4544,13 @@ static int ext4_commit_super(struct super_block *sb, int sync)
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- ext4_free_blocks_count_set(es,
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
+ ext4_free_blocks_count_set(es,
EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeclusters_counter)));
- es->s_free_inodes_count =
- cpu_to_le32(percpu_counter_sum_positive(
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
size_t size = 0;
int error;
- if (acl) {
- error = posix_acl_valid(acl);
- if (error < 0)
- return error;
- }
-
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c1d370..6aeed5bada52 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
#include "segment.h"
#include <trace/events/f2fs.h>
-static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *ino_entry_slab;
static struct kmem_cache *inode_entry_slab;
/*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
};
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct ino_entry *e;
+retry:
+ spin_lock(&sbi->ino_lock[type]);
+
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (!e) {
+ e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+ if (!e) {
+ spin_unlock(&sbi->ino_lock[type]);
+ goto retry;
+ }
+ if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ goto retry;
+ }
+ memset(e, 0, sizeof(struct ino_entry));
+ e->ino = ino;
+
+ list_add_tail(&e->list, &sbi->ino_list[type]);
+ }
+ spin_unlock(&sbi->ino_lock[type]);
+}
+
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct ino_entry *e;
+
+ spin_lock(&sbi->ino_lock[type]);
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (e) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[type], ino);
+ if (type == ORPHAN_INO)
+ sbi->n_orphans--;
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ return;
+ }
+ spin_unlock(&sbi->ino_lock[type]);
+}
+
+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* add new dirty ino entry into list */
+ __add_ino_entry(sbi, ino, type);
+}
+
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* remove dirty ino entry from list */
+ __remove_ino_entry(sbi, ino, type);
+}
+
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+ struct ino_entry *e;
+ spin_lock(&sbi->ino_lock[mode]);
+ e = radix_tree_lookup(&sbi->ino_root[mode], ino);
+ spin_unlock(&sbi->ino_lock[mode]);
+ return e ? true : false;
+}
+
+static void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+ struct ino_entry *e, *tmp;
+ int i;
+
+ for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+ spin_lock(&sbi->ino_lock[i]);
+ list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[i], e->ino);
+ kmem_cache_free(ino_entry_slab, e);
+ }
+ spin_unlock(&sbi->ino_lock[i]);
+ }
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
int err = 0;
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *new, *orphan;
-
- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- new->ino = ino;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, new);
- return;
- }
-
- if (orphan->ino > ino)
- break;
- }
-
- /* add new orphan entry into list which is sorted by inode number */
- list_add_tail(&new->list, &orphan->list);
- spin_unlock(&sbi->orphan_inode_lock);
+ /* add new orphan ino entry into list */
+ __add_ino_entry(sbi, ino, ORPHAN_INO);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *orphan;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- list_del(&orphan->list);
- f2fs_bug_on(sbi->n_orphans == 0);
- sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, orphan);
- return;
- }
- }
- spin_unlock(&sbi->orphan_inode_lock);
+ /* remove orphan entry from orphan list */
+ __remove_ino_entry(sbi, ino, ORPHAN_INO);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
struct page *page = NULL;
- struct orphan_inode_entry *orphan = NULL;
+ struct ino_entry *orphan = NULL;
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
+ head = &sbi->ino_list[ORPHAN_INO];
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
f2fs_put_page(page, 1);
}
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ retry_flush_dents:
* until finishing nat/sit flush.
*/
retry_flush_nodes:
- mutex_lock(&sbi->node_write);
+ down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
- mutex_unlock(&sbi->node_write);
+ up_write(&sbi->node_write);
sync_node_pages(sbi, 0, &wbc);
goto retry_flush_nodes;
}
@@ -726,7 +774,7 @@ retry_flush_nodes:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
- mutex_unlock(&sbi->node_write);
+ up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}
@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
nid_t last_nid = 0;
block_t start_blk;
struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
- discard_next_dnode(sbi);
+ discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
+ if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
clear_prefree_segments(sbi);
+ release_dirty_inode(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
}
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
-void init_orphan_info(struct f2fs_sb_info *sbi)
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
{
- spin_lock_init(&sbi->orphan_inode_lock);
- INIT_LIST_HEAD(&sbi->orphan_inode_list);
- sbi->n_orphans = 0;
+ int i;
+
+ for (i = 0; i < MAX_INO_ENTRY; i++) {
+ INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
+ spin_lock_init(&sbi->ino_lock[i]);
+ INIT_LIST_HEAD(&sbi->ino_list[i]);
+ }
+
/*
* considering 512 blocks in a segment 8 blocks are needed for cp
* and log segment summaries. Remaining blocks are used to keep
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
+ sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
* F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
- orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry));
- if (!orphan_entry_slab)
+ ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
+ sizeof(struct ino_entry));
+ if (!ino_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry));
if (!inode_entry_slab) {
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;
}
return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)
void destroy_checkpoint_caches(void)
{
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(inode_entry_slab);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619edb5f..03313099c51c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
- io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ if (test_opt(sbi, NOBARRIER))
+ io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+ else
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
up_write(&io->io_rwsem);
@@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
if (check_extent_cache(inode, pgofs, bh_result))
goto out;
- if (create)
+ if (create) {
+ f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
+ }
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(page, old_blkaddr, fio);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
} else {
write_data_page(page, &dn, &new_blkaddr, fio);
update_extent_cache(new_blkaddr, &dn);
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -914,6 +921,16 @@ skip_write:
return 0;
}
+static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, inode->i_size);
+ truncate_blocks(inode, inode->i_size);
+ }
+}
+
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
repeat:
err = f2fs_convert_inline_data(inode, pos + len);
if (err)
- return err;
+ goto fail;
page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ err = -ENOMEM;
+ goto fail;
+ }
/* to avoid latency during memory pressure */
unlock_page(page);
@@ -949,10 +968,9 @@ repeat:
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_reserve_block(&dn, index);
f2fs_unlock_op(sbi);
-
if (err) {
f2fs_put_page(page, 0);
- return err;
+ goto fail;
}
inline_data:
lock_page(page);
@@ -982,19 +1000,20 @@ inline_data:
err = f2fs_read_inline_data(inode, page);
if (err) {
page_cache_release(page);
- return err;
+ goto fail;
}
} else {
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
if (err)
- return err;
+ goto fail;
}
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
- return -EIO;
+ err = -EIO;
+ goto fail;
}
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
@@ -1005,6 +1024,9 @@ out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
+fail:
+ f2fs_write_failed(mapping, pos + len);
+ return err;
}
static int f2fs_write_end(struct file *file,
@@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,
trace_f2fs_write_end(inode, pos, len, copied);
- SetPageUptodate(page);
set_page_dirty(page);
if (pos + copied > i_size_read(inode)) {
@@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ int err;
/* Let buffer I/O handle the inline data case. */
if (f2fs_has_inline_data(inode))
@@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
/* clear fsync mark to recover these blocks */
fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
- return blockdev_direct_IO(rw, iocb, inode, iter, offset,
- get_data_block);
+ trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+
+ err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
+ if (err < 0 && (rw & WRITE))
+ f2fs_write_failed(mapping, offset + count);
+
+ trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+
+ return err;
}
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..a441ba33be11 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
+ si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
}
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
- goto bail;
+ return;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
- if (!file)
- goto free_debugfs_dir;
-
- return;
-
-free_debugfs_dir:
- debugfs_remove(f2fs_debugfs_root);
-
-bail:
- f2fs_debugfs_root = NULL;
- return;
+ if (!file) {
+ debugfs_remove(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
+ }
}
void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a4addd72ebbd..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
return bidx;
}
-static bool early_match_name(const char *name, size_t namelen,
- f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
+ struct f2fs_dir_entry *de)
{
if (le16_to_cpu(de->name_len) != namelen)
return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- const char *name, size_t namelen, int *max_slots,
+ struct qstr *name, int *max_slots,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
continue;
}
de = &dentry_blk->dentry[bit_pos];
- if (early_match_name(name, namelen, namehash, de)) {
+ if (early_match_name(name->len, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
- name, namelen)) {
+ name->name,
+ name->len)) {
*res_page = dentry_page;
goto found;
}
@@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
*max_slots = max_len;
max_len = 0;
}
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs are occurred.
+ */
+ f2fs_bug_on(!de->name_len);
+
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
@@ -132,10 +140,10 @@ found:
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
- unsigned int level, const char *name, size_t namelen,
+ unsigned int level, struct qstr *name,
f2fs_hash_t namehash, struct page **res_page)
{
- int s = GET_DENTRY_SLOTS(namelen);
+ int s = GET_DENTRY_SLOTS(name->len);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
@@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
continue;
}
- de = find_in_block(dentry_page, name, namelen,
- &max_slots, namehash, res_page);
+ de = find_in_block(dentry_page, name, &max_slots,
+ namehash, res_page);
if (de)
break;
@@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct qstr *child, struct page **res_page)
{
- const char *name = child->name;
- size_t namelen = child->len;
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
*res_page = NULL;
- name_hash = f2fs_dentry_hash(name, namelen);
+ name_hash = f2fs_dentry_hash(child);
max_depth = F2FS_I(dir)->i_current_depth;
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, name,
- namelen, name_hash, res_page);
+ de = find_in_level(dir, level, child, name_hash, res_page);
if (de)
break;
}
@@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,
struct page *dentry_page;
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
- void *kaddr;
dentry_page = get_new_data_page(inode, page, 0, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ dentry_blk = kmap_atomic(dentry_page);
de = &dentry_blk->dentry[0];
de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,
test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
- kunmap_atomic(kaddr);
+ kunmap_atomic(dentry_blk);
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,
static struct page *init_inode_metadata(struct inode *inode,
struct inode *dir, const struct qstr *name)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct page *page;
int err;
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
- page = new_inode_page(inode, name);
+ page = new_inode_page(inode);
if (IS_ERR(page))
return page;
@@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,
set_cold_node(inode, page);
}
- init_dent_inode(name, page);
+ if (name)
+ init_dent_inode(name, page);
/*
* This file should be checkpointed during fsync.
@@ -370,6 +376,12 @@ static struct page *init_inode_metadata(struct inode *inode,
*/
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
file_lost_pino(inode);
+ /*
+ * If link the tmpfile to alias through linkat path,
+ * we should remove this inode from orphan list.
+ */
+ if (inode->i_nlink == 0)
+ remove_orphan_inode(sbi, inode->i_ino);
inc_nlink(inode);
}
return page;
@@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
int err = 0;
int i;
- dentry_hash = f2fs_dentry_hash(name->name, name->len);
+ dentry_hash = f2fs_dentry_hash(name);
level = 0;
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@ fail:
return err;
}
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+{
+ struct page *page;
+ int err = 0;
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+ /* we don't need to mark_inode_dirty now */
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+ return err;
+}
+
/*
* It only removes the dentry from the dentry page,corresponding name
* entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
- void *kaddr = page_address(page);
int i;
lock_page(page);
f2fs_wait_on_page_writeback(page, DATA);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
- bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
+ dentry_blk = page_address(page);
+ bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
@@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)
unsigned long nblock = dir_blocks(dir);
for (bidx = 0; bidx < nblock; bidx++) {
- void *kaddr;
dentry_page = get_lock_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)
return false;
}
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ dentry_blk = kmap_atomic(dentry_page);
if (bidx == 0)
bit_pos = 2;
else
@@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
- kunmap_atomic(kaddr);
+ kunmap_atomic(dentry_blk);
f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58df97e174d0..4dab5338a97a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
+#define F2FS_MOUNT_NOBARRIER 0x00000400
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -99,8 +100,15 @@ enum {
META_SSA
};
-/* for the list of orphan inodes */
-struct orphan_inode_entry {
+/* for the list of ino */
+enum {
+ ORPHAN_INO, /* for orphan ino list */
+ APPEND_INO, /* for append ino list */
+ UPDATE_INO, /* for update ino list */
+ MAX_INO_ENTRY, /* max. list */
+};
+
+struct ino_entry {
struct list_head list; /* list head */
nid_t ino; /* inode number */
};
@@ -256,6 +264,8 @@ struct f2fs_nm_info {
unsigned int nat_cnt; /* the # of cached nat entries */
struct list_head nat_entries; /* cached nat entry list (clean) */
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
+ struct list_head nat_entry_set; /* nat entry set list */
+ unsigned int dirty_nat_cnt; /* total num of nat entries in set */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -442,14 +452,17 @@ struct f2fs_sb_info {
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
- struct mutex node_write; /* locking node writes */
+ struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;
- /* for orphan inode management */
- struct list_head orphan_inode_list; /* orphan inode list */
- spinlock_t orphan_inode_lock; /* for orphan inode list */
+ /* for inode management */
+ struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
+ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
+ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+
+ /* for orphan inode, use 0'th array */
unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */
@@ -768,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
if (flag == NAT_BITMAP)
return &ckpt->sit_nat_version_bitmap;
else
- return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+ return (unsigned char *)ckpt + F2FS_BLKSIZE;
} else {
offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -983,11 +996,15 @@ enum {
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used fo ipu for fdatasync */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- set_bit(flag, &fi->flags);
+ if (!test_bit(flag, &fi->flags))
+ set_bit(flag, &fi->flags);
}
static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -997,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- clear_bit(flag, &fi->flags);
+ if (test_bit(flag, &fi->flags))
+ clear_bit(flag, &fi->flags);
}
static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1136,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+int f2fs_do_tmpfile(struct inode *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
bool f2fs_empty_dir(struct inode *);
@@ -1155,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
/*
* node.c
@@ -1173,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
void remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *, const struct qstr *);
+struct page *new_inode_page(struct inode *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1185,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
+void recover_inline_xattr(struct inode *, struct page *);
bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1206,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
-void discard_next_dnode(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1240,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1251,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
-void init_orphan_info(struct f2fs_sb_info *);
+void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
@@ -1295,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
struct f2fs_stat_info {
struct list_head stat_list;
struct f2fs_sb_info *sbi;
- struct mutex stat_lock;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d8b96275092..208f1a9bd569 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return 0;
trace_f2fs_sync_file_enter(inode);
+
+ /* if fdatasync is triggered, let's do in-place-update */
+ if (datasync)
+ set_inode_flag(fi, FI_NEED_IPU);
+
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (datasync)
+ clear_inode_flag(fi, FI_NEED_IPU);
if (ret) {
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+ /*
+ * if there is no written data, don't waste time to write recovery info.
+ */
+ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+ !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
+ if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
+ exist_written_data(sbi, inode->i_ino, UPDATE_INO))
+ goto flush_out;
+ goto out;
+ }
+
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
@@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
+
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
+flush_out:
+ remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
@@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
/* find first dirty page index */
pagevec_init(&pvec, 0);
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
- pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
+ PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
pagevec_release(&pvec);
return pgofs;
}
@@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
}
}
- end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
/* find data/hole in dnode block */
for (; dn.ofs_in_node < end_offset;
@@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
return;
lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping)) {
- f2fs_put_page(page, 1);
- return;
- }
+ if (unlikely(!PageUptodate(page) ||
+ page->mapping != inode->i_mapping))
+ goto out;
+
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
+
+out:
f2fs_put_page(page, 1);
}
@@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
loff_t off_start, off_end;
int ret = 0;
+ f2fs_balance_fs(sbi);
+
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b90dbe55403a..d7947d90ccc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int hint = 0;
unsigned int secno;
/*
@@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
* selected by background GC before.
* Those segments guarantee they have small valid blocks.
*/
-next:
- secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
- if (secno < TOTAL_SECS(sbi)) {
+ for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
- goto next;
+ continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
*buf++ = pad;
}
-f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
const char *p;
__u32 in[8], buf[4];
+ const char *name = name_info->name;
+ size_t len = name_info->len;
if ((len <= 2) && (name[0] == '.') &&
(name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba5228c197..5beeccef9ae1 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,
stat_inc_inline_inode(inode);
}
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2cf6962f6cc8..2c39999f3868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -267,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
- goto no_delete;
+ goto out_clear;
f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
@@ -295,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
no_delete:
- clear_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+ if (xnid)
+ invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
+ if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+out_clear:
+ clear_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a6bdddc33ce2..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/ctype.h>
+#include <linux/dcache.h>
#include "f2fs.h"
#include "node.h"
@@ -22,14 +23,13 @@
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
nid_t ino;
struct inode *inode;
bool nid_free = false;
int err;
- inode = new_inode(sb);
+ inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
nid_t ino = 0;
int err;
@@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int err;
f2fs_balance_fs(sbi);
@@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode = dentry->d_inode;
struct f2fs_dir_entry *de;
struct page *page;
@@ -242,8 +239,7 @@ fail:
static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
size_t symlen = strlen(symname) + 1;
int err;
@@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
int err = 0;
@@ -369,8 +364,7 @@ out:
static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
- struct super_block *sb = old_dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct page *old_dir_page;
@@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_old;
}
- f2fs_lock_op(sbi);
-
if (new_inode) {
err = -ENOTEMPTY;
@@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!new_entry)
goto out_dir;
+ f2fs_lock_op(sbi);
+
err = acquire_orphan_inode(sbi);
if (err)
goto put_out_dir;
@@ -435,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
+ f2fs_lock_op(sbi);
+
err = f2fs_add_link(new_dentry, old_inode);
- if (err)
+ if (err) {
+ f2fs_unlock_op(sbi);
goto out_dir;
+ }
if (old_dir_entry) {
inc_nlink(new_dir);
@@ -472,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
put_out_dir:
+ f2fs_unlock_op(sbi);
kunmap(new_page);
f2fs_put_page(new_page, 0);
out_dir:
@@ -479,7 +478,151 @@ out_dir:
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
+out_old:
+ kunmap(old_page);
+ f2fs_put_page(old_page, 0);
+out:
+ return err;
+}
+
+static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct super_block *sb = old_dir->i_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = new_dentry->d_inode;
+ struct page *old_dir_page, *new_dir_page;
+ struct page *old_page, *new_page;
+ struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
+ struct f2fs_dir_entry *old_entry, *new_entry;
+ int old_nlink = 0, new_nlink = 0;
+ int err = -ENOENT;
+
+ f2fs_balance_fs(sbi);
+
+ old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+ if (!old_entry)
+ goto out;
+
+ new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+ if (!new_entry)
+ goto out_old;
+
+ /* prepare for updating ".." directory entry info later */
+ if (old_dir != new_dir) {
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EIO;
+ old_dir_entry = f2fs_parent_dir(old_inode,
+ &old_dir_page);
+ if (!old_dir_entry)
+ goto out_new;
+ }
+
+ if (S_ISDIR(new_inode->i_mode)) {
+ err = -EIO;
+ new_dir_entry = f2fs_parent_dir(new_inode,
+ &new_dir_page);
+ if (!new_dir_entry)
+ goto out_old_dir;
+ }
+ }
+
+ /*
+ * If cross rename between file and directory those are not
+ * in the same directory, we will inc nlink of file's parent
+ * later, so we should check upper boundary of its nlink.
+ */
+ if ((!old_dir_entry || !new_dir_entry) &&
+ old_dir_entry != new_dir_entry) {
+ old_nlink = old_dir_entry ? -1 : 1;
+ new_nlink = -old_nlink;
+ err = -EMLINK;
+ if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
+ (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
+ goto out_new_dir;
+ }
+
+ f2fs_lock_op(sbi);
+
+ err = update_dent_inode(old_inode, &new_dentry->d_name);
+ if (err)
+ goto out_unlock;
+
+ err = update_dent_inode(new_inode, &old_dentry->d_name);
+ if (err)
+ goto out_undo;
+
+ /* update ".." directory entry info of old dentry */
+ if (old_dir_entry)
+ f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+
+ /* update ".." directory entry info of new dentry */
+ if (new_dir_entry)
+ f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
+
+ /* update directory entry info of old dir inode */
+ f2fs_set_link(old_dir, old_entry, old_page, new_inode);
+
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
+ update_inode_page(old_inode);
+
+ old_dir->i_ctime = CURRENT_TIME;
+ if (old_nlink) {
+ down_write(&F2FS_I(old_dir)->i_sem);
+ if (old_nlink < 0)
+ drop_nlink(old_dir);
+ else
+ inc_nlink(old_dir);
+ up_write(&F2FS_I(old_dir)->i_sem);
+ }
+ mark_inode_dirty(old_dir);
+ update_inode_page(old_dir);
+
+ /* update directory entry info of new dir inode */
+ f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+
+ down_write(&F2FS_I(new_inode)->i_sem);
+ file_lost_pino(new_inode);
+ up_write(&F2FS_I(new_inode)->i_sem);
+
+ update_inode_page(new_inode);
+
+ new_dir->i_ctime = CURRENT_TIME;
+ if (new_nlink) {
+ down_write(&F2FS_I(new_dir)->i_sem);
+ if (new_nlink < 0)
+ drop_nlink(new_dir);
+ else
+ inc_nlink(new_dir);
+ up_write(&F2FS_I(new_dir)->i_sem);
+ }
+ mark_inode_dirty(new_dir);
+ update_inode_page(new_dir);
+
+ f2fs_unlock_op(sbi);
+ return 0;
+out_undo:
+ /* Still we may fail to recover name info of f2fs_inode here */
+ update_dent_inode(old_inode, &old_dentry->d_name);
+out_unlock:
f2fs_unlock_op(sbi);
+out_new_dir:
+ if (new_dir_entry) {
+ kunmap(new_dir_page);
+ f2fs_put_page(new_dir_page, 0);
+ }
+out_old_dir:
+ if (old_dir_entry) {
+ kunmap(old_dir_page);
+ f2fs_put_page(old_dir_page, 0);
+ }
+out_new:
+ kunmap(new_page);
+ f2fs_put_page(new_page, 0);
out_old:
kunmap(old_page);
f2fs_put_page(old_page, 0);
@@ -487,6 +630,71 @@ out:
return err;
}
+static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ return -EINVAL;
+
+ if (flags & RENAME_EXCHANGE) {
+ return f2fs_cross_rename(old_dir, old_dentry,
+ new_dir, new_dentry);
+ }
+ /*
+ * VFS has already handled the new dentry existence case,
+ * here, we just deal with "RENAME_NOREPLACE" as regular rename.
+ */
+ return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct inode *inode;
+ int err;
+
+ inode = f2fs_new_inode(dir, mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = &f2fs_file_inode_operations;
+ inode->i_fop = &f2fs_file_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+
+ f2fs_lock_op(sbi);
+ err = acquire_orphan_inode(sbi);
+ if (err)
+ goto out;
+
+ err = f2fs_do_tmpfile(inode, dir);
+ if (err)
+ goto release_out;
+
+ /*
+ * add this non-linked tmpfile to orphan list, in this way we could
+ * remove all unused data of tmpfile after abnormal power-off.
+ */
+ add_orphan_inode(sbi, inode->i_ino);
+ f2fs_unlock_op(sbi);
+
+ alloc_nid_done(sbi, inode->i_ino);
+ d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
+ return 0;
+
+release_out:
+ release_orphan_inode(sbi);
+out:
+ f2fs_unlock_op(sbi);
+ clear_nlink(inode);
+ unlock_new_inode(inode);
+ make_bad_inode(inode);
+ iput(inode);
+ alloc_nid_failed(sbi, inode->i_ino);
+ return err;
+}
+
const struct inode_operations f2fs_dir_inode_operations = {
.create = f2fs_create,
.lookup = f2fs_lookup,
@@ -497,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {
.rmdir = f2fs_rmdir,
.mknod = f2fs_mknod,
.rename = f2fs_rename,
+ .rename2 = f2fs_rename2,
+ .tmpfile = f2fs_tmpfile,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4b697ccc9b0c..d3d90d284631 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
/* get current nat block page with lock */
src_page = get_meta_page(sbi, src_off);
-
- /* Dirty src_page means that it is already the new target NAT page. */
- if (PageDirty(src_page))
- return src_page;
-
dst_page = grab_meta_page(sbi, dst_off);
+ f2fs_bug_on(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
@@ -845,7 +842,7 @@ void remove_inode_page(struct inode *inode)
truncate_node(&dn);
}
-struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+struct page *new_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
@@ -1234,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
if (wbc->for_reclaim)
goto redirty_out;
- mutex_lock(&sbi->node_write);
+ down_read(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
- mutex_unlock(&sbi->node_write);
+ up_read(&sbi->node_write);
unlock_page(page);
return 0;
@@ -1552,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
clear_node_page_dirty(page);
}
-static void recover_inline_xattr(struct inode *inode, struct page *page)
+void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
void *src_addr, *dst_addr;
@@ -1591,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
nid_t new_xnid = nid_of_node(page);
struct node_info ni;
- recover_inline_xattr(inode, page);
-
if (!f2fs_has_xattr_block(ofs_of_node(page)))
return false;
@@ -1744,7 +1739,90 @@ skip:
return err;
}
-static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+static struct nat_entry_set *grab_nat_entry_set(void)
+{
+ struct nat_entry_set *nes =
+ f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+ nes->entry_cnt = 0;
+ INIT_LIST_HEAD(&nes->set_list);
+ INIT_LIST_HEAD(&nes->entry_list);
+ return nes;
+}
+
+static void release_nat_entry_set(struct nat_entry_set *nes,
+ struct f2fs_nm_info *nm_i)
+{
+ f2fs_bug_on(!list_empty(&nes->entry_list));
+
+ nm_i->dirty_nat_cnt -= nes->entry_cnt;
+ list_del(&nes->set_list);
+ kmem_cache_free(nat_entry_set_slab, nes);
+}
+
+static void adjust_nat_entry_set(struct nat_entry_set *nes,
+ struct list_head *head)
+{
+ struct nat_entry_set *next = nes;
+
+ if (list_is_last(&nes->set_list, head))
+ return;
+
+ list_for_each_entry_continue(next, head, set_list)
+ if (nes->entry_cnt <= next->entry_cnt)
+ break;
+
+ list_move_tail(&nes->set_list, &next->set_list);
+}
+
+static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
+{
+ struct nat_entry_set *nes;
+ nid_t start_nid = START_NID(ne->ni.nid);
+
+ list_for_each_entry(nes, head, set_list) {
+ if (nes->start_nid == start_nid) {
+ list_move_tail(&ne->list, &nes->entry_list);
+ nes->entry_cnt++;
+ adjust_nat_entry_set(nes, head);
+ return;
+ }
+ }
+
+ nes = grab_nat_entry_set();
+
+ nes->start_nid = start_nid;
+ list_move_tail(&ne->list, &nes->entry_list);
+ nes->entry_cnt++;
+ list_add(&nes->set_list, head);
+}
+
+static void merge_nats_in_set(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct list_head *dirty_list = &nm_i->dirty_nat_entries;
+ struct list_head *set_list = &nm_i->nat_entry_set;
+ struct nat_entry *ne, *tmp;
+
+ write_lock(&nm_i->nat_tree_lock);
+ list_for_each_entry_safe(ne, tmp, dirty_list, list) {
+ if (nat_get_blkaddr(ne) == NEW_ADDR)
+ continue;
+ add_nat_entry(ne, set_list);
+ nm_i->dirty_nat_cnt++;
+ }
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
+static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
+{
+ if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
+ return true;
+ else
+ return false;
+}
+
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
int i;
mutex_lock(&curseg->curseg_mutex);
-
- if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
- mutex_unlock(&curseg->curseg_mutex);
- return false;
- }
-
for (i = 0; i < nats_in_cursum(sum); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
retry:
write_lock(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
- if (ne) {
- __set_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
- continue;
- }
+ if (ne)
+ goto found;
+
ne = grab_nat_entry(nm_i, nid);
if (!ne) {
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
node_info_from_raw_nat(&ne->ni, &raw_ne);
+found:
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
- return true;
}
/*
@@ -1794,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- struct nat_entry *ne, *cur;
- struct page *page = NULL;
- struct f2fs_nat_block *nat_blk = NULL;
- nid_t start_nid = 0, end_nid = 0;
- bool flushed;
+ struct nat_entry_set *nes, *tmp;
+ struct list_head *head = &nm_i->nat_entry_set;
+ bool to_journal = true;
- flushed = flush_nats_in_journal(sbi);
-
- if (!flushed)
- mutex_lock(&curseg->curseg_mutex);
-
- /* 1) flush dirty nat caches */
- list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
- nid_t nid;
- struct f2fs_nat_entry raw_ne;
- int offset = -1;
-
- if (nat_get_blkaddr(ne) == NEW_ADDR)
- continue;
+ /* merge nat entries of dirty list to nat entry set temporarily */
+ merge_nats_in_set(sbi);
- nid = nat_get_nid(ne);
+ /*
+ * if there are no enough space in journal to store dirty nat
+ * entries, remove all entries from journal and merge them
+ * into nat entry set.
+ */
+ if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
+ remove_nats_in_journal(sbi);
- if (flushed)
- goto to_nat_page;
+ /*
+ * merge nat entries of dirty list to nat entry set temporarily
+ */
+ merge_nats_in_set(sbi);
+ }
- /* if there is room for nat enries in curseg->sumpage */
- offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
- if (offset >= 0) {
- raw_ne = nat_in_journal(sum, offset);
- goto flush_now;
- }
-to_nat_page:
- if (!page || (start_nid > nid || nid > end_nid)) {
- if (page) {
- f2fs_put_page(page, 1);
- page = NULL;
- }
- start_nid = START_NID(nid);
- end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
+ if (!nm_i->dirty_nat_cnt)
+ return;
- /*
- * get nat block with dirty flag, increased reference
- * count, mapped and lock
- */
+ /*
+ * there are two steps to flush nat entries:
+ * #1, flush nat entries to journal in current hot data summary block.
+ * #2, flush nat entries to nat page.
+ */
+ list_for_each_entry_safe(nes, tmp, head, set_list) {
+ struct f2fs_nat_block *nat_blk;
+ struct nat_entry *ne, *cur;
+ struct page *page;
+ nid_t start_nid = nes->start_nid;
+
+ if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
+ to_journal = false;
+
+ if (to_journal) {
+ mutex_lock(&curseg->curseg_mutex);
+ } else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
+ f2fs_bug_on(!nat_blk);
}
- f2fs_bug_on(!nat_blk);
- raw_ne = nat_blk->entries[nid - start_nid];
-flush_now:
- raw_nat_from_node_info(&raw_ne, &ne->ni);
-
- if (offset < 0) {
- nat_blk->entries[nid - start_nid] = raw_ne;
- } else {
- nat_in_journal(sum, offset) = raw_ne;
- nid_in_journal(sum, offset) = cpu_to_le32(nid);
- }
+ /* flush dirty nats in nat entry set */
+ list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
+ struct f2fs_nat_entry *raw_ne;
+ nid_t nid = nat_get_nid(ne);
+ int offset;
+
+ if (to_journal) {
+ offset = lookup_journal_in_cursum(sum,
+ NAT_JOURNAL, nid, 1);
+ f2fs_bug_on(offset < 0);
+ raw_ne = &nat_in_journal(sum, offset);
+ nid_in_journal(sum, offset) = cpu_to_le32(nid);
+ } else {
+ raw_ne = &nat_blk->entries[nid - start_nid];
+ }
+ raw_nat_from_node_info(raw_ne, &ne->ni);
- if (nat_get_blkaddr(ne) == NULL_ADDR &&
+ if (nat_get_blkaddr(ne) == NULL_ADDR &&
add_free_nid(sbi, nid, false) <= 0) {
- write_lock(&nm_i->nat_tree_lock);
- __del_from_nat_cache(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
- } else {
- write_lock(&nm_i->nat_tree_lock);
- __clear_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
+ write_lock(&nm_i->nat_tree_lock);
+ __del_from_nat_cache(nm_i, ne);
+ write_unlock(&nm_i->nat_tree_lock);
+ } else {
+ write_lock(&nm_i->nat_tree_lock);
+ __clear_nat_cache_dirty(nm_i, ne);
+ write_unlock(&nm_i->nat_tree_lock);
+ }
}
+
+ if (to_journal)
+ mutex_unlock(&curseg->curseg_mutex);
+ else
+ f2fs_put_page(page, 1);
+
+ release_nat_entry_set(nes, nm_i);
}
- if (!flushed)
- mutex_unlock(&curseg->curseg_mutex);
- f2fs_put_page(page, 1);
+
+ f2fs_bug_on(!list_empty(head));
+ f2fs_bug_on(nm_i->dirty_nat_cnt);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+ INIT_LIST_HEAD(&nm_i->nat_entry_set);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2058,30 @@ int __init create_node_manager_caches(void)
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry));
if (!nat_entry_slab)
- return -ENOMEM;
+ goto fail;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
sizeof(struct free_nid));
- if (!free_nid_slab) {
- kmem_cache_destroy(nat_entry_slab);
- return -ENOMEM;
- }
+ if (!free_nid_slab)
+ goto destory_nat_entry;
+
+ nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+ sizeof(struct nat_entry_set));
+ if (!nat_entry_set_slab)
+ goto destory_free_nid;
return 0;
+
+destory_free_nid:
+ kmem_cache_destroy(free_nid_slab);
+destory_nat_entry:
+ kmem_cache_destroy(nat_entry_slab);
+fail:
+ return -ENOMEM;
}
void destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
DIRTY_DENTS /* indicates dirty dentry pages */
};
+struct nat_entry_set {
+ struct list_head set_list; /* link with all nat sets */
+ struct list_head entry_list; /* link with dirty nat entries */
+ nid_t start_nid; /* start nid of nats in set */
+ unsigned int entry_cnt; /* the # of nat entries in set */
+};
+
/*
* For free nid mangement
*/
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368a4a86..fe1c6d921ba2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct node_info ni;
int err = 0, recovered = 0;
+ recover_inline_xattr(inode, page);
+
if (recover_inline_data(inode, page))
goto out;
@@ -434,7 +436,9 @@ next:
int recover_fsync_data(struct f2fs_sb_info *sbi)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
+ block_t blkaddr;
int err;
bool need_writecp = false;
@@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
+
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
+
+ if (err) {
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
+ truncate_inode_pages_final(META_MAPPING(sbi));
+ }
+
sbi->por_doing = false;
- if (!err && need_writecp)
+ if (err) {
+ discard_next_dnode(sbi, blkaddr);
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META))
+ sync_meta_pages(sbi, META, LONG_MAX);
+ } else if (need_writecp) {
write_checkpoint(sbi, false);
+ }
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d04613df710a..0dfeebae2a50 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
struct flush_cmd cmd;
+ trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+ test_opt(sbi, FLUSH_MERGE));
+
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
@@ -272,13 +278,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
return -ENOMEM;
spin_lock_init(&fcc->issue_lock);
init_waitqueue_head(&fcc->flush_wait_queue);
- sbi->sm_info->cmd_control_info = fcc;
+ SM_I(sbi)->cmd_control_info = fcc;
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
kfree(fcc);
- sbi->sm_info->cmd_control_info = NULL;
+ SM_I(sbi)->cmd_control_info = NULL;
return err;
}
@@ -287,13 +293,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
{
- struct flush_cmd_control *fcc =
- sbi->sm_info->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
if (fcc && fcc->f2fs_issue_flush)
kthread_stop(fcc->f2fs_issue_flush);
kfree(fcc);
- sbi->sm_info->cmd_control_info = NULL;
+ SM_I(sbi)->cmd_control_info = NULL;
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -377,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}
-void discard_next_dnode(struct f2fs_sb_info *sbi)
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
if (f2fs_issue_discard(sbi, blkaddr, 1)) {
struct page *page = grab_meta_page(sbi, blkaddr);
/* zero-filled page */
@@ -437,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno = -1;
+ unsigned int segno;
unsigned int total_segs = TOTAL_SEGS(sbi);
mutex_lock(&dirty_i->seglist_lock);
- while (1) {
- segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
- segno + 1);
- if (segno >= total_segs)
- break;
+ for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
__set_test_and_free(sbi, segno);
- }
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -974,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
- unsigned int old_cursegno;
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
- old_cursegno = curseg->segno;
/*
* __add_sum_entry should be resided under the curseg_mutex
@@ -1002,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
- locate_dirty_segment(sbi, old_cursegno);
mutex_unlock(&sit_i->sentry_lock);
@@ -1532,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start = 0, end = 0;
- unsigned int segno = -1;
+ unsigned int segno;
bool flushed;
mutex_lock(&curseg->curseg_mutex);
@@ -1544,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
*/
flushed = flush_sits_in_journal(sbi);
- while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
+ for_each_set_bit(segno, bitmap, nsegs) {
struct seg_entry *se = get_seg_entry(sbi, segno);
int sit_offset, offset;
@@ -1703,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+ array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
if (!array)
return -ENOMEM;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..55973f7b0330 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
- next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
- start_segno);
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
if (S_ISDIR(inode->i_mode))
return false;
+ /* this is only set during fdatasync */
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+ return true;
+
switch (SM_I(sbi)->ipu_policy) {
case F2FS_IPU_FORCE:
return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f96d9372ade..657582fc7601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
Opt_inline_xattr,
Opt_inline_data,
Opt_flush_merge,
+ Opt_nobarrier,
Opt_err,
};
@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_flush_merge, "flush_merge"},
+ {Opt_nobarrier, "nobarrier"},
{Opt_err, NULL},
};
@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
+ case Opt_nobarrier:
+ set_opt(sbi, NOBARRIER);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",inline_data");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
+ if (test_opt(sbi, NOBARRIER))
+ seq_puts(seq, ",nobarrier");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
*/
- if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+ if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
/*
@@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
destroy_flush_cmd_control(sbi);
- } else if (test_opt(sbi, FLUSH_MERGE) &&
- !sbi->sm_info->cmd_control_info) {
+ } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
@@ -947,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- mutex_init(&sbi->node_write);
+ init_rwsem(&sbi->node_write);
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
@@ -997,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);
- init_orphan_info(sbi);
+ init_ino_entry_info(sbi);
/* setup f2fs internal modules */
err = build_segment_manager(sbi);
@@ -1034,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ iput(root);
err = -EINVAL;
- goto free_root_inode;
+ goto free_node_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -1082,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be568b7311d6..ef9bef118342 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode)
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) {
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+ __wait_on_bit(wqh, &wq, bit_wait,
+ TASK_UNINTERRUPTIBLE);
spin_lock(&inode->i_lock);
}
}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index aec01be91b0a..89acec742e0b 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
_enter("%p", cookie);
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock;
@@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
if (!fscache_defer_lookup) {
_debug("non-deferred lookup %p", &cookie->flags);
wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
_debug("complete");
if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
goto unavailable;
@@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
_enter("%p", cookie);
wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
- fscache_wait_bit_interruptible,
TASK_UNINTERRUPTIBLE);
_leave("");
@@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
}
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock_enable;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index bc6c08fcfddd..7872a62ef30c 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
}
-extern int fscache_wait_bit(void *);
-extern int fscache_wait_bit_interruptible(void *);
extern int fscache_wait_atomic_t(atomic_t *);
/*
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 63f868e869b9..a31b83c5cbd9 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -197,24 +197,6 @@ static void __exit fscache_exit(void)
module_exit(fscache_exit);
/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-int fscache_wait_bit(void *flags)
-{
- schedule();
- return 0;
-}
-
-/*
- * wait_on_bit() sleep function for interruptible waiting
- */
-int fscache_wait_bit_interruptible(void *flags)
-{
- schedule();
- return signal_pending(current);
-}
-
-/*
* wait_on_atomic_t() sleep function for uninterruptible waiting
*/
int fscache_wait_atomic_t(atomic_t *p)
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ed70714503fa..85332b9d19d1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
jif = jiffies;
if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
- fscache_wait_bit_interruptible,
TASK_INTERRUPTIBLE) != 0) {
fscache_stat(&fscache_n_retrievals_intr);
_leave(" = -ERESTARTSYS");
@@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
if (stat_op_waits)
fscache_stat(stat_op_waits);
if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
- fscache_wait_bit_interruptible,
TASK_INTERRUPTIBLE) != 0) {
ret = fscache_cancel_op(op, do_cancel);
if (ret == 0)
@@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
/* it's been removed from the pending queue by another party,
* so we should get to run shortly */
wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
}
_debug("<<< GO");
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ee4e04fe60fc..7f513b1ceb2c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -856,27 +856,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
}
/**
- * gfs2_glock_holder_wait
- * @word: unused
- *
- * This function and gfs2_glock_demote_wait both show up in the WCHAN
- * field. Thus I've separated these otherwise identical functions in
- * order to be more informative to the user.
- */
-
-static int gfs2_glock_holder_wait(void *word)
-{
- schedule();
- return 0;
-}
-
-static int gfs2_glock_demote_wait(void *word)
-{
- schedule();
- return 0;
-}
-
-/**
* gfs2_glock_wait - wait on a glock acquisition
* @gh: the glock holder
*
@@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
unsigned long time1 = jiffies;
might_sleep();
- wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
/* Lengthen the minimum hold time. */
gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
@@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
struct gfs2_glock *gl = gh->gh_gl;
gfs2_glock_dq(gh);
might_sleep();
- wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
}
/**
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 4fafea1c9ecf..641383a9c1bb 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -936,12 +936,6 @@ fail:
return error;
}
-static int dlm_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
static int control_first_done(struct gfs2_sbd *sdp)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -976,7 +970,7 @@ restart:
fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
- dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
goto restart;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0d6d16..d3eae244076e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
lm->lm_unmount(sdp);
}
-static int gfs2_journalid_wait(void *word)
-{
- if (signal_pending(current))
- return -EINTR;
- schedule();
- return 0;
-}
-
static int wait_on_journal(struct gfs2_sbd *sdp)
{
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
return 0;
- return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
+ return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
+ ? -EINTR : 0;
}
void gfs2_online_uevent(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 94555d4c5698..573bd3b758fa 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -591,12 +591,6 @@ done:
wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
}
-static int gfs2_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
{
int rv;
@@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
BUG_ON(!rv);
if (wait)
- wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
+ wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
TASK_UNINTERRUPTIBLE);
return wait ? jd->jd_recover_error : 0;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1319b5c4ec68..2607ff13d486 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
return error;
}
-static int gfs2_umount_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
/**
* gfs2_put_super - Unmount the filesystem
* @sb: The VFS superblock
@@ -894,7 +888,7 @@ restart:
continue;
spin_unlock(&sdp->sd_jindex_spin);
wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
- gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
goto restart;
}
spin_unlock(&sdp->sd_jindex_spin);
diff --git a/fs/inode.c b/fs/inode.c
index 6eecb7ff0b9a..5938f3928944 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1695,13 +1695,6 @@ int inode_needs_sync(struct inode *inode)
}
EXPORT_SYMBOL(inode_needs_sync);
-int inode_wait(void *word)
-{
- schedule();
- return 0;
-}
-EXPORT_SYMBOL(inode_wait);
-
/*
* If we try to find an inode in the inode hash while it is being
* deleted, we have to wait until the filesystem completes its
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6f0f590cc5a3..5f09370c90a8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh)
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}
-static int sleep_on_shadow_bh(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* If the buffer is already part of the current transaction, then there
* is nothing we need to do. If it is already part of a prior
@@ -906,8 +900,8 @@ repeat:
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh);
- wait_on_bit(&bh->b_state, BH_Shadow,
- sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&bh->b_state, BH_Shadow,
+ TASK_UNINTERRUPTIBLE);
goto repeat;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index d895b4b7b661..4429d6d9217f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -896,7 +896,7 @@ const struct file_operations kernfs_file_fops = {
* @ops: kernfs operations for the file
* @priv: private data for the file
* @ns: optional namespace tag of the file
- * @static_name: don't copy file name
+ * @name_is_static: don't copy file name
* @key: lockdep key for the file's active_ref, %NULL to disable lockdep
*
* Returns the created node on success, ERR_PTR() value on error.
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..daa8e7514eae 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -306,11 +306,9 @@ static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
static void nsm_init_private(struct nsm_handle *nsm)
{
u64 *p = (u64 *)&nsm->sm_priv.data;
- struct timespec ts;
s64 ns;
- ktime_get_ts(&ts);
- ns = timespec_to_ns(&ts);
+ ns = ktime_get_ns();
put_unaligned(ns, p);
put_unaligned((unsigned long)nsm, p + 1);
}
diff --git a/fs/locks.c b/fs/locks.c
index 717fbc404e6b..a6f54802d277 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -325,7 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
return -ENOMEM;
fl->fl_file = filp;
- fl->fl_owner = (fl_owner_t)filp;
+ fl->fl_owner = filp;
fl->fl_pid = current->tgid;
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
@@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
if (assign_type(fl, type) != 0)
return -EINVAL;
- fl->fl_owner = (fl_owner_t)current->files;
+ fl->fl_owner = current->files;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
@@ -1155,7 +1155,6 @@ EXPORT_SYMBOL(posix_lock_file_wait);
int locks_mandatory_locked(struct file *file)
{
struct inode *inode = file_inode(file);
- fl_owner_t owner = current->files;
struct file_lock *fl;
/*
@@ -1165,7 +1164,8 @@ int locks_mandatory_locked(struct file *file)
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!IS_POSIX(fl))
continue;
- if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
+ if (fl->fl_owner != current->files &&
+ fl->fl_owner != file)
break;
}
spin_unlock(&inode->i_lock);
@@ -1205,7 +1205,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
for (;;) {
if (filp) {
- fl.fl_owner = (fl_owner_t)filp;
+ fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
error = __posix_lock_file(inode, &fl, NULL);
if (!error)
@@ -1948,7 +1948,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
cmd = F_GETLK;
file_lock.fl_flags |= FL_OFDLCK;
- file_lock.fl_owner = (fl_owner_t)filp;
+ file_lock.fl_owner = filp;
}
error = vfs_test_lock(filp, &file_lock);
@@ -2103,7 +2103,7 @@ again:
cmd = F_SETLK;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2112,7 +2112,7 @@ again:
cmd = F_SETLKW;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
/* Fallthrough */
case F_SETLKW:
file_lock->fl_flags |= FL_SLEEP;
@@ -2170,7 +2170,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
cmd = F_GETLK64;
file_lock.fl_flags |= FL_OFDLCK;
- file_lock.fl_owner = (fl_owner_t)filp;
+ file_lock.fl_owner = filp;
}
error = vfs_test_lock(filp, &file_lock);
@@ -2242,7 +2242,7 @@ again:
cmd = F_SETLK64;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2251,7 +2251,7 @@ again:
cmd = F_SETLKW64;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
/* Fallthrough */
case F_SETLKW64:
file_lock->fl_flags |= FL_SLEEP;
@@ -2324,11 +2324,11 @@ void locks_remove_file(struct file *filp)
if (!inode->i_flock)
return;
- locks_remove_posix(filp, (fl_owner_t)filp);
+ locks_remove_posix(filp, filp);
if (filp->f_op->flock) {
struct file_lock fl = {
- .fl_owner = (fl_owner_t)filp,
+ .fl_owner = filp,
.fl_pid = current->tgid,
.fl_file = filp,
.fl_flags = FL_FLOCK,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
* Prevent starvation issues if someone is doing a consistency
* sync-to-disk
*/
- ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..e2a0361e24c6 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
- wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
}
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 567983d2c0eb..7dd55b745c4d 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -174,7 +174,9 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
static struct key_type key_type_id_resolver = {
.name = "id_resolver",
- .instantiate = user_instantiate,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .instantiate = generic_key_instantiate,
.match = user_match,
.revoke = user_revoke,
.destroy = user_destroy,
@@ -282,6 +284,8 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
desc, "", 0, idmap);
mutex_unlock(&idmap->idmap_mutex);
}
+ if (!IS_ERR(rkey))
+ set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
kfree(desc);
return rkey;
@@ -394,7 +398,9 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
static struct key_type key_type_id_resolver_legacy = {
.name = "id_legacy",
- .instantiate = user_instantiate,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .instantiate = generic_key_instantiate,
.match = user_match,
.revoke = user_revoke,
.destroy = user_destroy,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..abd37a380535 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
-int nfs_wait_bit_killable(void *word)
+int nfs_wait_bit_killable(struct wait_bit_key *key)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -1074,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
* the bit lock here if it looks like we're going to be doing that.
*/
for (;;) {
- ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
goto out;
spin_lock(&inode->i_lock);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f415cbf9f6c3..617f36611d4a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -348,7 +348,7 @@ extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(void *word);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key);
/* super.c */
extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..42f121182167 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep();
atomic_inc(&clp->cl_count);
- res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
goto out;
if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 17fab89f6358..0be5050638f7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -115,7 +115,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
- ret = nfs_wait_bit_killable(&c->flags);
+ ret = nfs_wait_bit_killable(&q.key);
} while (atomic_read(&c->io_count) != 0);
finish_wait(wq, &q.wait);
return ret;
@@ -136,12 +136,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
return __nfs_iocounter_wait(c);
}
-static int nfs_wait_bit_uninterruptible(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
@@ -156,7 +150,6 @@ nfs_page_group_lock(struct nfs_page *req)
WARN_ON_ONCE(head != head->wb_head);
wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
- nfs_wait_bit_uninterruptible,
TASK_UNINTERRUPTIBLE);
}
@@ -435,9 +428,8 @@ void nfs_release_request(struct nfs_page *req)
int
nfs_wait_on_request(struct nfs_page *req)
{
- return wait_on_bit(&req->wb_flags, PG_BUSY,
- nfs_wait_bit_uninterruptible,
- TASK_UNINTERRUPTIBLE);
+ return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+ TASK_UNINTERRUPTIBLE);
}
/*
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a8914b335617 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync)
goto out;
- status = wait_on_bit_lock(&nfsi->flags,
+ status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5e2f10304548..962c9ee758be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -623,7 +623,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
int err;
/* Stop dirtying of new pages while we sync */
- err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+ err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (err)
goto out_err;
@@ -1703,7 +1703,7 @@ int nfs_commit_inode(struct inode *inode, int how)
return error;
if (!may_wait)
goto out_mark_dirty;
- error = wait_on_bit(&NFS_I(inode)->flags,
+ error = wait_on_bit_action(&NFS_I(inode)->flags,
NFS_INO_COMMIT,
nfs_wait_bit_killable,
TASK_KILLABLE);
diff --git a/fs/open.c b/fs/open.c
index 36662d036237..d6fd3acde134 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -263,11 +263,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EPERM;
/*
- * We can not allow to do any fallocate operation on an active
- * swapfile
+ * We cannot allow any fallocate operation on an active swapfile
*/
if (IS_SWAPFILE(inode))
- ret = -ETXTBSY;
+ return -ETXTBSY;
/*
* Revalidate the write permissions, in case security policy has
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..cd3653e4f35c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -297,15 +297,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) {
seq_printf(m, "%08x",
- a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+ a->cap[CAP_LAST_U32 - __capi]);
}
seq_putc(m, '\n');
}
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
- CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
-
static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
const struct cred *cred;
@@ -319,11 +315,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
cap_bset = cred->cap_bset;
rcu_read_unlock();
- NORM_CAPS(cap_inheritable);
- NORM_CAPS(cap_permitted);
- NORM_CAPS(cap_effective);
- NORM_CAPS(cap_bset);
-
render_cap_t(m, "CapInh:\t", &cap_inheritable);
render_cap_t(m, "CapPrm:\t", &cap_permitted);
render_cap_t(m, "CapEff:\t", &cap_effective);
@@ -473,13 +464,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
priority = task_prio(task);
nice = task_nice(task);
- /* Temporary variable needed for gcc-2.96 */
- /* convert timespec -> nsec*/
- start_time =
- (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
- + task->real_start_time.tv_nsec;
/* convert nsec -> ticks */
- start_time = nsec_to_clock_t(start_time);
+ start_time = nsec_to_clock_t(task->real_start_time);
seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
seq_put_decimal_ll(m, ' ', ppid);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0013142c0475..80c350216ea8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -35,8 +35,9 @@ struct timerfd_ctx {
ktime_t moffs;
wait_queue_head_t wqh;
u64 ticks;
- int expired;
int clockid;
+ short unsigned expired;
+ short unsigned settime_flags; /* to show in fdinfo */
struct rcu_head rcu;
struct list_head clist;
bool might_cancel;
@@ -92,7 +93,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
*/
void timerfd_clock_was_set(void)
{
- ktime_t moffs = ktime_get_monotonic_offset();
+ ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
struct timerfd_ctx *ctx;
unsigned long flags;
@@ -125,7 +126,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
{
if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
return false;
- ctx->moffs = ktime_get_monotonic_offset();
+ ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
return true;
}
@@ -196,6 +197,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
if (timerfd_canceled(ctx))
return -ECANCELED;
}
+
+ ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
return 0;
}
@@ -284,11 +287,77 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
return res;
}
+#ifdef CONFIG_PROC_FS
+static int timerfd_show(struct seq_file *m, struct file *file)
+{
+ struct timerfd_ctx *ctx = file->private_data;
+ struct itimerspec t;
+
+ spin_lock_irq(&ctx->wqh.lock);
+ t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+ t.it_interval = ktime_to_timespec(ctx->tintv);
+ spin_unlock_irq(&ctx->wqh.lock);
+
+ return seq_printf(m,
+ "clockid: %d\n"
+ "ticks: %llu\n"
+ "settime flags: 0%o\n"
+ "it_value: (%llu, %llu)\n"
+ "it_interval: (%llu, %llu)\n",
+ ctx->clockid, (unsigned long long)ctx->ticks,
+ ctx->settime_flags,
+ (unsigned long long)t.it_value.tv_sec,
+ (unsigned long long)t.it_value.tv_nsec,
+ (unsigned long long)t.it_interval.tv_sec,
+ (unsigned long long)t.it_interval.tv_nsec);
+}
+#else
+#define timerfd_show NULL
+#endif
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct timerfd_ctx *ctx = file->private_data;
+ int ret = 0;
+
+ switch (cmd) {
+ case TFD_IOC_SET_TICKS: {
+ u64 ticks;
+
+ if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
+ return -EFAULT;
+ if (!ticks)
+ return -EINVAL;
+
+ spin_lock_irq(&ctx->wqh.lock);
+ if (!timerfd_canceled(ctx)) {
+ ctx->ticks = ticks;
+ if (ticks)
+ wake_up_locked(&ctx->wqh);
+ } else
+ ret = -ECANCELED;
+ spin_unlock_irq(&ctx->wqh.lock);
+ break;
+ }
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+
+ return ret;
+}
+#else
+#define timerfd_ioctl NULL
+#endif
+
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
.poll = timerfd_poll,
.read = timerfd_read,
.llseek = noop_llseek,
+ .show_fdinfo = timerfd_show,
+ .unlocked_ioctl = timerfd_ioctl,
};
static int timerfd_fget(int fd, struct fd *p)
@@ -336,7 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
else
hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
- ctx->moffs = ktime_get_monotonic_offset();
+ ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));