From b9c1c26739ec2d4b4fb70207a0a9ad6747e43f4c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 May 2019 11:56:23 -0400 Subject: ext4: gracefully handle ext4_break_layouts() failure during truncate ext4_break_layouts() may fail e.g. due to a signal being delivered. Thus we need to handle its failure gracefully and not by taking the filesystem down. Currently ext4_break_layouts() failure is rare but it may become more common once RDMA uses layout leases for handling long-term page pins for DAX mappings. To handle the failure we need to move ext4_break_layouts() earlier during setattr handling before we do hard to undo changes such as modifying inode size. To be able to do that we also have to move some other checks which are better done without holding i_mmap_sem earlier. Reported-and-tested-by: Ira Weiny Reviewed-by: Ira Weiny Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 66 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7f77c643008..c16071547c9c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5571,7 +5571,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; - int shrink = (attr->ia_size <= inode->i_size); + int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -5585,18 +5585,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) inode_inc_iversion(inode); - if (ext4_should_order_data(inode) && - (attr->ia_size < inode->i_size)) { - error = ext4_begin_ordered_truncate(inode, + if (shrink) { + if (ext4_should_order_data(inode)) { + error = ext4_begin_ordered_truncate(inode, attr->ia_size); - if (error) - goto err_out; + if (error) + goto err_out; + } + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. + */ + inode_dio_wait(inode); + } + + down_write(&EXT4_I(inode)->i_mmap_sem); + + rc = ext4_break_layouts(inode); + if (rc) { + up_write(&EXT4_I(inode)->i_mmap_sem); + return rc; } + if (attr->ia_size != inode->i_size) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); if (IS_ERR(handle)) { error = PTR_ERR(handle); - goto err_out; + goto out_mmap_sem; } if (ext4_handle_valid(handle) && shrink) { error = ext4_orphan_add(handle, inode); @@ -5624,42 +5639,31 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) i_size_write(inode, attr->ia_size); up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); - if (error) { - if (orphan && inode->i_nlink) - ext4_orphan_del(NULL, inode); - goto err_out; + if (error) + goto out_mmap_sem; + if (!shrink) { + pagecache_isize_extended(inode, oldsize, + inode->i_size); + } else if (ext4_should_journal_data(inode)) { + ext4_wait_for_tail_page_commit(inode); } } - if (!shrink) { - pagecache_isize_extended(inode, oldsize, inode->i_size); - } else { - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. - */ - inode_dio_wait(inode); - } - if (orphan && ext4_should_journal_data(inode)) - ext4_wait_for_tail_page_commit(inode); - down_write(&EXT4_I(inode)->i_mmap_sem); - - rc = ext4_break_layouts(inode); - if (rc) { - up_write(&EXT4_I(inode)->i_mmap_sem); - error = rc; - goto err_out; - } /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. */ truncate_pagecache(inode, inode->i_size); - if (shrink) { + /* + * Call ext4_truncate() even if i_size didn't change to + * truncate possible preallocated blocks. + */ + if (attr->ia_size <= oldsize) { rc = ext4_truncate(inode); if (rc) error = rc; } +out_mmap_sem: up_write(&EXT4_I(inode)->i_mmap_sem); } -- cgit v1.2.3 From 7821ce417ec730a80d46ad0a513a84692433a5e8 Mon Sep 17 00:00:00 2001 From: Gaowei Pu Date: Thu, 30 May 2019 15:08:34 -0400 Subject: jbd2: fix some print format mistakes There are some print format mistakes in debug messages. Fix them. Signed-off-by: Gaowei Pu Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/jbd2/journal.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 43df0c943229..38b426c5ed03 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -203,7 +203,7 @@ loop: if (journal->j_flags & JBD2_UNMOUNT) goto end_loop; - jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", + jbd_debug(1, "commit_sequence=%u, commit_request=%u\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { @@ -324,7 +324,7 @@ static void journal_kill_thread(journal_t *journal) * IO is in progress. do_get_write_access() handles this. * * The function returns a pointer to the buffer_head to be used for IO. - * + * * * Return value: * <0: Error @@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) */ journal->j_commit_request = target; - jbd_debug(1, "JBD2: requesting commit %d/%d\n", + jbd_debug(1, "JBD2: requesting commit %u/%u\n", journal->j_commit_request, journal->j_commit_sequence); journal->j_running_transaction->t_requested = jiffies; @@ -513,7 +513,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", journal->j_commit_request, journal->j_commit_sequence, - target, journal->j_running_transaction ? + target, journal->j_running_transaction ? journal->j_running_transaction->t_tid : 0); return 0; } @@ -698,12 +698,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR - "%s: error: j_commit_request=%d, tid=%d\n", + "%s: error: j_commit_request=%u, tid=%u\n", __func__, journal->j_commit_request, tid); } #endif while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", + jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n", tid, journal->j_commit_sequence); read_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_commit); @@ -944,7 +944,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) trace_jbd2_update_log_tail(journal, tid, block, freed); jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %lu), " + "Cleaning journal tail from %u to %u (offset %lu), " "freeing %lu\n", journal->j_tail_sequence, tid, block, freed); @@ -1318,7 +1318,7 @@ static int journal_reset(journal_t *journal) */ if (sb->s_start == 0) { jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", + "(start %ld, seq %u, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); journal->j_flags |= JBD2_FLUSHED; @@ -1453,7 +1453,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) return; } - jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", + jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n", journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); -- cgit v1.2.3 From a49773064bc2284bf7fe57c3492897135f65a37c Mon Sep 17 00:00:00 2001 From: Liu Song Date: Thu, 30 May 2019 15:15:57 -0400 Subject: jbd2: fix typo in comment of journal_submit_inode_data_buffers delayed/dealyed Signed-off-by: Liu Song Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/jbd2/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index efd0ce9489ae..c8c1d6cc6e5d 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -184,7 +184,7 @@ static int journal_wait_on_commit_record(journal_t *journal, /* * write the filemap data using writepage() address_space_operations. * We don't do block allocation here even for delalloc. We don't - * use writepages() because with dealyed allocation we may be doing + * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ static int journal_submit_inode_data_buffers(struct address_space *mapping) -- cgit v1.2.3 From 2e53840362771c73eb0a5ff71611507e64e8eecd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 9 Jun 2019 21:41:41 -0400 Subject: ext4: don't allow any modifications to an immutable file Don't allow any modifications to a file that's marked immutable, which means that we have to flush all the writable pages to make the readonly and we have to check the setattr/setflags parameters more closely. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ioctl.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e486e49b31ed..7af835ac8d23 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16]) } #endif +/* + * If immutable is set and we are not clearing it, we're not allowed to change + * anything else in the inode. Don't error out if we're only trying to set + * immutable on an immutable file. + */ +static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, + unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int oldflags = ei->i_flags; + + if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) + return 0; + + if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) + return -EPERM; + if (ext4_has_feature_project(inode->i_sb) && + __kprojid_val(ei->i_projid) != new_projid) + return -EPERM; + + return 0; +} + static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { @@ -340,6 +363,20 @@ static int ext4_ioctl_setflags(struct inode *inode, } } + /* + * Wait for all pending directio and then flush all the dirty pages + * for this file. The flush marks all the pages readonly, so any + * subsequent attempt to write to the file (particularly mmap pages) + * will come through the filesystem and fail. + */ + if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && + (flags & EXT4_IMMUTABLE_FL)) { + inode_dio_wait(inode); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto flags_out; + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -769,7 +806,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return err; inode_lock(inode); - err = ext4_ioctl_setflags(inode, flags); + err = ext4_ioctl_check_immutable(inode, + from_kprojid(&init_user_ns, ei->i_projid), + flags); + if (!err) + err = ext4_ioctl_setflags(inode, flags); inode_unlock(inode); mnt_drop_write_file(filp); return err; @@ -1139,6 +1180,9 @@ resizefs_out: goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | (flags & EXT4_FL_XFLAG_VISIBLE); + err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags); + if (err) + goto out; err = ext4_ioctl_setflags(inode, flags); if (err) goto out; -- cgit v1.2.3 From 02b016ca7f99229ae6227e7b2fc950c4e140d74a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 9 Jun 2019 22:04:33 -0400 Subject: ext4: enforce the immutable flag on open files According to the chattr man page, "a file with the 'i' attribute cannot be modified..." Historically, this was only enforced when the file was opened, per the rest of the description, "... and the file can not be opened in write mode". There is general agreement that we should standardize all file systems to prevent modifications even for files that were opened at the time the immutable flag is set. Eventually, a change to enforce this at the VFS layer should be landing in mainline. Until then, enforce this at the ext4 level to prevent xfstests generic/553 from failing. Signed-off-by: Theodore Ts'o Cc: "Darrick J. Wong" Cc: stable@kernel.org --- fs/ext4/file.c | 4 ++++ fs/ext4/inode.c | 11 +++++++++++ 2 files changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2c5baa5e8291..f4a24a46245e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret <= 0) return ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c16071547c9c..ed1d8f9ce5f9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5520,6 +5520,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (unlikely(IS_APPEND(inode) && + (ia_valid & (ATTR_MODE | ATTR_UID | + ATTR_GID | ATTR_TIMES_SET)))) + return -EPERM; + error = setattr_prepare(dentry, attr); if (error) return error; @@ -6194,6 +6202,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) get_block_t *get_block; int retries = 0; + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); -- cgit v1.2.3 From 7ddf79a103958ff7e529a3bc0c5b3d1e9cbc22c7 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 10 Jun 2019 00:13:32 -0400 Subject: ext4: only set project inherit bit for directory It doesn't make any sense to have project inherit bits for regular files, even though this won't cause any problem, but it is better fix this. Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Reviewed-by: Darrick J. Wong Reviewed-by: Andreas Dilger --- fs/ext4/ext4.h | 3 ++- fs/ext4/ioctl.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1cb67859e051..ceb74093e138 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -421,7 +421,8 @@ struct flex_groups { EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL)) +#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ + EXT4_PROJINHERIT_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7af835ac8d23..74648d42c69b 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -779,6 +779,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ext4_ioc_getfsmap(sb, (void __user *)arg); case EXT4_IOC_GETFLAGS: flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + if (S_ISREG(inode->i_mode)) + flags &= ~EXT4_PROJINHERIT_FL; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { int err; -- cgit v1.2.3 From c60990b361cc0a08e7b442a8191d89e9304c2d62 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Jun 2019 16:30:03 -0400 Subject: ext4: clean up kerneldoc warnigns when building with W=1 Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 4 ++-- fs/ext4/dir.c | 3 +++ fs/ext4/extents.c | 4 ++-- fs/ext4/indirect.c | 22 ++++++++-------------- fs/ext4/mballoc.c | 5 +++-- fs/ext4/move_extent.c | 12 ++++++------ 6 files changed, 24 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e5d6ee61ff48..0b202e00d93f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, } /** - * ext4_should_retry_alloc() + * ext4_should_retry_alloc() - check if a block allocation should be retried * @sb: super block - * @retries number of attemps has been made + * @retries: number of attemps has been made * * ext4_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index c7843b149a1e..1f7784bee42a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -33,6 +33,9 @@ static int ext4_dx_readdir(struct file *, struct dir_context *); /** + * is_dx_dir() - check if a directory is using htree indexing + * @inode: directory inode + * * Check if the given dir-inode refers to an htree-indexed directory * (or a directory which could potentially get converted to use htree * indexing). diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d40ed940001e..92266a2da7d6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5676,8 +5676,8 @@ out_mutex: } /** - * ext4_swap_extents - Swap extents between two inodes - * + * ext4_swap_extents() - Swap extents between two inodes + * @handle: handle for this transaction * @inode1: First inode * @inode2: Second inode * @lblk1: Start block for first inode diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 2024d3fa5504..36699a131168 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, } /** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. + * ext4_alloc_branch() - allocate and set up a chain of blocks + * @handle: handle for this transaction + * @ar: structure describing the allocation request + * @indirect_blks: number of allocated indirect blocks + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. * * This function allocates blocks, zeroes out all but the last one, * links them into chain and (if we are synchronous) writes them to disk. @@ -396,15 +394,11 @@ failed: } /** - * ext4_splice_branch - splice the allocated branch onto inode. + * ext4_splice_branch() - splice the allocated branch onto inode. * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) + * @ar: structure describing the allocation request * @where: location of missing link * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding * * This function fills the missing link and does all housekeeping needed in * inode (->i_blocks, etc.). In case of success we end up with the full diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99ba720dbb7a..a3e2767bdf2f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * ext4_free_blocks() -- Free given blocks and update quota * @handle: handle for this transaction * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1083a9f3f16a..3ec9627c9713 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -13,11 +13,10 @@ #include "ext4_extents.h" /** - * get_ext_path - Find an extent path for designated logical block number. - * - * @inode: an inode which is searched + * get_ext_path() - Find an extent path for designated logical block number. + * @inode: inode to be searched * @lblock: logical block number to find an extent path - * @path: pointer to an extent path pointer (for output) + * @ppath: pointer to an extent path pointer (for output) * * ext4_find_extent wrapper. Return 0 on success, or a negative error value * on failure. @@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, } /** - * ext4_double_down_write_data_sem - Acquire two inodes' write lock - * of i_data_sem + * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem + * @first: inode to be locked + * @second: inode to be locked * * Acquire write lock of i_data_sem of the two inodes */ -- cgit v1.2.3 From b03755ad6f33b7b8cd7312a3596a2dbf496de6e7 Mon Sep 17 00:00:00 2001 From: zhangjs Date: Wed, 19 Jun 2019 23:41:29 -0400 Subject: ext4: make __ext4_get_inode_loc plug Add a blk_plug to prevent the inode table readahead from being submitted as small I/O requests. Signed-off-by: zhangjs Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ed1d8f9ce5f9..11d7c2e403e9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4570,6 +4570,7 @@ static int __ext4_get_inode_loc(struct inode *inode, struct buffer_head *bh; struct super_block *sb = inode->i_sb; ext4_fsblk_t block; + struct blk_plug plug; int inodes_per_block, inode_offset; iloc->bh = NULL; @@ -4658,6 +4659,7 @@ make_io: * If we need to do any I/O, try to pre-readahead extra * blocks from the inode table. */ + blk_start_plug(&plug); if (EXT4_SB(sb)->s_inode_readahead_blks) { ext4_fsblk_t b, end, table; unsigned num; @@ -4688,6 +4690,7 @@ make_io: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + blk_finish_plug(&plug); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, -- cgit v1.2.3 From 3ae72562ad917df36a1b1247d749240e3b4865db Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Jun 2019 23:45:09 -0400 Subject: ext4: optimize case-insensitive lookups Temporarily cache a casefolded version of the file name under lookup in ext4_filename, to avoid repeatedly casefolding it. I got up to 30% speedup on lookups of large directories (>100k entries), depending on the length of the string under lookup. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 2 +- fs/ext4/ext4.h | 39 ++++++++++++++++++++++++++++++++++++--- fs/ext4/namei.c | 43 ++++++++++++++++++++++++++++++++++++++----- fs/unicode/utf8-core.c | 28 ++++++++++++++++++++++++++++ include/linux/unicode.h | 3 +++ 5 files changed, 106 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1f7784bee42a..770a1e6d4672 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -677,7 +677,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, return memcmp(str, name->name, len); } - return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr); + return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); } static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ceb74093e138..7215a2a2a0de 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2078,6 +2078,9 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif +#ifdef CONFIG_UNICODE + struct fscrypt_str cf_name; +#endif }; #define fname_name(p) ((p)->disk_name.name) @@ -2303,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); +#ifdef CONFIG_UNICODE +extern void ext4_fname_setup_ci_filename(struct inode *dir, + const struct qstr *iname, + struct fscrypt_str *fname); +#endif + #ifdef CONFIG_FS_ENCRYPTION static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, const struct fscrypt_name *src) @@ -2329,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif return 0; } @@ -2344,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name); +#endif return 0; } @@ -2357,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->crypto_buf.name = NULL; fname->usr_fname = NULL; fname->disk_name.name = NULL; + +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif } #else /* !CONFIG_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, @@ -2367,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif + return 0; } @@ -2377,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname); } -static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } +static inline void ext4_fname_free_filename(struct ext4_filename *fname) +{ +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} #endif /* !CONFIG_FS_ENCRYPTION */ /* dir.c */ @@ -3120,8 +3153,8 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); extern int ext4_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry); + const struct qstr *fname, + const struct qstr *entry, bool quick); #define S_SHIFT 12 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cd01c4a67ffb..4909ced4e672 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1259,19 +1259,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) #ifdef CONFIG_UNICODE /* * Test whether a case-insensitive directory entry matches the filename - * being searched for. + * being searched for. If quick is set, assume the name being looked up + * is already in the casefolded form. * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry) + const struct qstr *entry, bool quick) { const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); const struct unicode_map *um = sbi->s_encoding; int ret; - ret = utf8_strncasecmp(um, name, entry); + if (quick) + ret = utf8_strncasecmp_folded(um, name, entry); + else + ret = utf8_strncasecmp(um, name, entry); + if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. @@ -1287,6 +1292,27 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, return ret; } + +void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, + struct fscrypt_str *cf_name) +{ + if (!IS_CASEFOLDED(dir)) { + cf_name->name = NULL; + return; + } + + cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS); + if (!cf_name->name) + return; + + cf_name->len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + iname, cf_name->name, + EXT4_NAME_LEN); + if (cf_name->len <= 0) { + kfree(cf_name->name); + cf_name->name = NULL; + } +} #endif /* @@ -1313,8 +1339,15 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) - return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0); + if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (fname->cf_name.name) { + struct qstr cf = {.name = fname->cf_name.name, + .len = fname->cf_name.len}; + return !ext4_ci_compare(parent, &cf, &entry, true); + } + return !ext4_ci_compare(parent, fname->usr_fname, &entry, + false); + } #endif return fscrypt_match_name(&f, de->name, de->name_len); diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 6afab4fdce90..71ca4d047d65 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um, } EXPORT_SYMBOL(utf8_strncasecmp); +/* String cf is expected to be a valid UTF-8 casefolded + * string. + */ +int utf8_strncasecmp_folded(const struct unicode_map *um, + const struct qstr *cf, + const struct qstr *s1) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur1; + int c1, c2; + int i = 0; + + if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) + return -EINVAL; + + do { + c1 = utf8byte(&cur1); + c2 = cf->name[i++]; + if (c1 < 0) + return -EINVAL; + if (c1 != c2) + return 1; + } while (c1); + + return 0; +} +EXPORT_SYMBOL(utf8_strncasecmp_folded); + int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/include/linux/unicode.h b/include/linux/unicode.h index aec2c6d800aa..990aa97d8049 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um, int utf8_strncasecmp(const struct unicode_map *um, const struct qstr *s1, const struct qstr *s2); +int utf8_strncasecmp_folded(const struct unicode_map *um, + const struct qstr *cf, + const struct qstr *s1); int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); -- cgit v1.2.3 From c708b1c6de7f996b073bb3296af17f37881a09de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Jun 2019 00:10:10 -0400 Subject: ext4: remove redundant assignment to node Pointer 'node' is assigned a value that is never read, node is later overwritten when it re-assigned a different value inside the while-loop. The assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/extents_status.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 023a3eb3afa3..7521de2dcf3a 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); if (!es) goto out_wrap; - node = &es->rb_node; while (*nr_to_scan > 0) { if (es->es_lblk > end) { ei->i_es_shrink_lblk = end + 1; -- cgit v1.2.3 From 6ba0e7dc64a5adcda2fbe65adc466891795d639e Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 20 Jun 2019 17:24:56 -0400 Subject: jbd2: introduce jbd2_inode dirty range scoping Currently both journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() operate on the entire address space of each of the inodes associated with a given journal entry. The consequence of this is that if we have an inode where we are constantly appending dirty pages we can end up waiting for an indefinite amount of time in journal_finish_inode_data_buffers() while we wait for all the pages under writeback to be written out. The easiest way to cause this type of workload is do just dd from /dev/zero to a file until it fills the entire filesystem. This can cause journal_finish_inode_data_buffers() to wait for the duration of the entire dd operation. We can improve this situation by scoping each of the inode dirty ranges associated with a given transaction. We do this via the jbd2_inode structure so that the scoping is contained within jbd2 and so that it follows the lifetime and locking rules for that structure. This allows us to limit the writeback & wait in journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() respectively to the dirty range for a given struct jdb2_inode, keeping us from waiting forever if the inode in question is still being appended to. Signed-off-by: Ross Zwisler Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/jbd2/commit.c | 23 +++++++++++++++++------ fs/jbd2/journal.c | 4 ++++ fs/jbd2/transaction.c | 49 ++++++++++++++++++++++++++++--------------------- include/linux/jbd2.h | 22 ++++++++++++++++++++++ 4 files changed, 71 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c8c1d6cc6e5d..132fb92098c7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -187,14 +187,15 @@ static int journal_wait_on_commit_record(journal_t *journal, * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ -static int journal_submit_inode_data_buffers(struct address_space *mapping) +static int journal_submit_inode_data_buffers(struct address_space *mapping, + loff_t dirty_start, loff_t dirty_end) { int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, - .range_start = 0, - .range_end = i_size_read(mapping->host), + .range_start = dirty_start, + .range_end = dirty_end, }; ret = generic_writepages(mapping, &wbc); @@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WRITE_DATA)) continue; mapping = jinode->i_vfs_inode->i_mapping; @@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal, * only allocated blocks here. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping); + err = journal_submit_inode_data_buffers(mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WAIT_DATA)) continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_keep_errors( - jinode->i_vfs_inode->i_mapping); + err = filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; } } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 38b426c5ed03..17f679aeba7c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -94,6 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); EXPORT_SYMBOL(jbd2_journal_inode_add_write); EXPORT_SYMBOL(jbd2_journal_inode_add_wait); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); @@ -2574,6 +2576,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) jinode->i_next_transaction = NULL; jinode->i_vfs_inode = inode; jinode->i_flags = 0; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; INIT_LIST_HEAD(&jinode->i_list); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8ca4fddc705f..990e7b5062e7 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) * File inode in the inode list of the handle's transaction */ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, - unsigned long flags) + unsigned long flags, loff_t start_byte, loff_t end_byte) { transaction_t *transaction = handle->h_transaction; journal_t *journal; @@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); - /* - * First check whether inode isn't already on the transaction's - * lists without taking the lock. Note that this check is safe - * without the lock as we cannot race with somebody removing inode - * from the transaction. The reason is that we remove inode from the - * transaction only in journal_release_jbd_inode() and when we commit - * the transaction. We are guarded from the first case by holding - * a reference to the inode. We are safe against the second case - * because if jinode->i_transaction == transaction, commit code - * cannot touch the transaction because we hold reference to it, - * and if jinode->i_next_transaction == transaction, commit code - * will only file the inode where we want it. - */ - if ((jinode->i_transaction == transaction || - jinode->i_next_transaction == transaction) && - (jinode->i_flags & flags) == flags) - return 0; - spin_lock(&journal->j_list_lock); jinode->i_flags |= flags; + + if (jinode->i_dirty_end) { + jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); + jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); + } else { + jinode->i_dirty_start = start_byte; + jinode->i_dirty_end = end_byte; + } + /* Is inode already attached where we need it? */ if (jinode->i_transaction == transaction || jinode->i_next_transaction == transaction) @@ -2631,12 +2622,28 @@ done: int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) { return jbd2_journal_file_inode(handle, jinode, - JI_WRITE_DATA | JI_WAIT_DATA); + JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); } int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) { - return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, + LLONG_MAX); +} + +int jbd2_journal_inode_ranged_write(handle_t *handle, + struct jbd2_inode *jinode, loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, + JI_WRITE_DATA | JI_WAIT_DATA, start_byte, + start_byte + length - 1); +} + +int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, + loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, + start_byte, start_byte + length - 1); } /* diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5c04181b7c6d..0e0393e7f41a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -451,6 +451,22 @@ struct jbd2_inode { * @i_flags: Flags of inode [j_list_lock] */ unsigned long i_flags; + + /** + * @i_dirty_start: + * + * Offset in bytes where the dirty range for this inode starts. + * [j_list_lock] + */ + loff_t i_dirty_start; + + /** + * @i_dirty_end: + * + * Inclusive offset in bytes where the dirty range for this inode + * ends. [j_list_lock] + */ + loff_t i_dirty_end; }; struct jbd2_revoke_table_s; @@ -1397,6 +1413,12 @@ extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_inode_ranged_write(handle_t *handle, + struct jbd2_inode *inode, loff_t start_byte, + loff_t length); +extern int jbd2_journal_inode_ranged_wait(handle_t *handle, + struct jbd2_inode *inode, loff_t start_byte, + loff_t length); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); -- cgit v1.2.3 From 73131fbb003b3691cfcf9656f234b00da497fcd6 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 20 Jun 2019 17:26:26 -0400 Subject: ext4: use jbd2_inode dirty range scoping Use the newly introduced jbd2_inode dirty range scoping to prevent us from waiting forever when trying to complete a journal transaction. Signed-off-by: Ross Zwisler Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/ext4/ext4_jbd2.h | 12 ++++++------ fs/ext4/inode.c | 13 ++++++++++--- fs/ext4/move_extent.c | 3 ++- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 75a5309f2231..ef8fcf7d0d3b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal) } static inline int ext4_jbd2_inode_add_write(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_write(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_write(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline int ext4_jbd2_inode_add_wait(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_wait(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_wait(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 11d7c2e403e9..35561cde56e7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -731,10 +731,16 @@ out_sem: !(flags & EXT4_GET_BLOCKS_ZERO) && !ext4_is_quota_file(inode) && ext4_should_order_data(inode)) { + loff_t start_byte = + (loff_t)map->m_lblk << inode->i_blkbits; + loff_t length = (loff_t)map->m_len << inode->i_blkbits; + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) - ret = ext4_jbd2_inode_add_wait(handle, inode); + ret = ext4_jbd2_inode_add_wait(handle, inode, + start_byte, length); else - ret = ext4_jbd2_inode_add_write(handle, inode); + ret = ext4_jbd2_inode_add_write(handle, inode, + start_byte, length); if (ret) return ret; } @@ -4085,7 +4091,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, err = 0; mark_buffer_dirty(bh); if (ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode); + err = ext4_jbd2_inode_add_write(handle, inode, from, + length); } unlock: diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3ec9627c9713..30ce3dc69378 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -390,7 +390,8 @@ data_copy: /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ - *err = ext4_jbd2_inode_add_write(handle, orig_inode); + *err = ext4_jbd2_inode_add_write(handle, orig_inode, + (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); unlock_pages: unlock_page(pagep[0]); -- cgit v1.2.3 From 9382cde8cd8fb941fc333b644a5772d02e1ff924 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 20 Jun 2019 17:32:21 -0400 Subject: jbd2: drop declaration of journal_sync_buffer() The journal_sync_buffer() function was never carried over from jbd to jbd2. So get rid of the vestigal declaration of this (non-existent) function. Signed-off-by: Theodore Ts'o Reviewed-by: Darrick J. Wong --- fs/jbd2/journal.c | 3 --- include/linux/jbd2.h | 1 - 2 files changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 17f679aeba7c..953990eb70a9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access); EXPORT_SYMBOL(jbd2_journal_set_triggers); EXPORT_SYMBOL(jbd2_journal_dirty_metadata); EXPORT_SYMBOL(jbd2_journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif EXPORT_SYMBOL(jbd2_journal_flush); EXPORT_SYMBOL(jbd2_journal_revoke); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0e0393e7f41a..df03825ad1a1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1373,7 +1373,6 @@ void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); -extern void journal_sync_buffer (struct buffer_head *); extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned int, unsigned int); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); -- cgit v1.2.3 From 4e19d6b65fb4fc42e352ce9883649e049da14743 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 20 Jun 2019 21:19:02 -0400 Subject: ext4: allow directory holes The largedir feature was intended to allow ext4 directories to have unmapped directory blocks (e.g., directory holes). And so the released e2fsprogs no longer enforces this for largedir file systems; however, the corresponding change to the kernel-side code was not made. This commit fixes this oversight. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/dir.c | 19 +++++++++---------- fs/ext4/namei.c | 45 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 770a1e6d4672..3a77b7affd09 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -112,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; - int dir_has_error = 0; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); if (IS_ENCRYPTED(inode)) { @@ -148,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return err; } - offset = ctx->pos & (sb->s_blocksize - 1); - while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; @@ -158,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto errout; } cond_resched(); + offset = ctx->pos & (sb->s_blocksize - 1); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); + if (err == 0) { + /* m_len should never be zero but let's avoid + * an infinite loop if it somehow is */ + if (map.m_len == 0) + map.m_len = 1; + ctx->pos += map.m_len * sb->s_blocksize; + continue; + } if (err > 0) { pgoff_t index = map.m_pblk >> (PAGE_SHIFT - inode->i_blkbits); @@ -179,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } if (!bh) { - if (!dir_has_error) { - EXT4_ERROR_FILE(file, 0, - "directory contains a " - "hole at offset %llu", - (unsigned long long) ctx->pos); - dir_has_error = 1; - } /* corrupt size? Maybe no more blocks to read */ if (ctx->pos > inode->i_blocks << 9) break; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4909ced4e672..0cda080f3fd5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -82,8 +82,18 @@ static struct buffer_head *ext4_append(handle_t *handle, static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); +/* + * Hints to ext4_read_dirblock regarding whether we expect a directory + * block being read to be an index block, or a block containing + * directory entries (and if the latter, whether it was found via a + * logical block in an htree index block). This is used to control + * what sort of sanity checkinig ext4_read_dirblock() will do on the + * directory block read from the storage device. EITHER will means + * the caller doesn't know what kind of directory block will be read, + * so no specific verification will be done. + */ typedef enum { - EITHER, INDEX, DIRENT + EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ @@ -109,11 +119,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } - if (!bh) { + if (!bh && (type == INDEX || type == DIRENT_HTREE)) { ext4_error_inode(inode, func, line, block, - "Directory hole found"); + "Directory hole found for htree %s block", + (type == INDEX) ? "index" : "leaf"); return ERR_PTR(-EFSCORRUPTED); } + if (!bh) + return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { @@ -980,7 +993,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1619,7 +1632,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; @@ -2203,6 +2216,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); + if (bh == NULL) { + bh = ext4_bread(handle, dir, block, + EXT4_GET_BLOCKS_CREATE); + goto add_to_new_block; + } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2223,6 +2241,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, brelse(bh); } bh = ext4_append(handle, dir, &block); +add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2267,7 +2286,7 @@ again: return PTR_ERR(frame); entries = frame->entries; at = frame->at; - bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; @@ -2815,7 +2834,10 @@ bool ext4_empty_dir(struct inode *inode) EXT4_ERROR_INODE(inode, "invalid size"); return true; } - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, + * so treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) return true; @@ -2837,6 +2859,10 @@ bool ext4_empty_dir(struct inode *inode) brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); + if (bh == NULL) { + offset += sb->s_blocksize; + continue; + } if (IS_ERR(bh)) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; @@ -3402,7 +3428,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, so + * treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; -- cgit v1.2.3 From f036adb39976467cf57f577490269594994f9eb4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 21 Jun 2019 15:49:26 -0400 Subject: ext4: rename "dirent_csum" functions to use "dirblock" Functions such as ext4_dirent_csum_verify() and ext4_dirent_csum_set() don't actually operate on a directory entry, but a directory block. And while they take a struct ext4_dir_entry *dirent as an argument, it had better be the first directory at the beginning of the direct block, or things will go very wrong. Rename the following functions so that things make more sense, and remove a lot of confusing casts along the way: ext4_dirent_csum_verify -> ext4_dirblock_csum_verify ext4_dirent_csum_set -> ext4_dirblock_csum_set ext4_dirent_csum -> ext4_dirblock_csum ext4_handle_dirty_dirent_node -> ext4_handle_dirty_dirblock Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 3 +-- fs/ext4/ext4.h | 9 ++++---- fs/ext4/inline.c | 2 +- fs/ext4/namei.c | 62 ++++++++++++++++++++++++++------------------------------ 4 files changed, 35 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 3a77b7affd09..86054f31fe4d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -194,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) /* Check the checksum */ if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { + !ext4_dirblock_csum_verify(inode, bh)) { EXT4_ERROR_FILE(file, 0, "directory fails checksum " "at offset %llu", (unsigned long long)ctx->pos); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7215a2a2a0de..5b86df7ec326 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2602,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *); extern int ext4_ind_migrate(struct inode *inode); /* namei.c */ -extern int ext4_dirent_csum_verify(struct inode *inode, - struct ext4_dir_entry *dirent); +extern int ext4_dirblock_csum_verify(struct inode *inode, + struct buffer_head *bh); extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, @@ -3149,9 +3149,8 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, unsigned int parent_ino, int dotdot_real_len); extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, unsigned int blocksize); -extern int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh); +extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, + struct buffer_head *bh); extern int ext4_ci_compare(const struct inode *parent, const struct qstr *fname, const struct qstr *entry, bool quick); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f73bc3925282..f19dd5a08d0d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1164,7 +1164,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, initialize_dirent_tail(t, inode->i_sb->s_blocksize); } set_buffer_uptodate(dir_block); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) return err; set_buffer_verified(dir_block); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0cda080f3fd5..4f0bcbbcfe96 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -163,7 +163,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, } } if (!is_dx_block) { - if (ext4_dirent_csum_verify(inode, dirent)) + if (ext4_dirblock_csum_verify(inode, bh)) set_buffer_verified(bh); else { ext4_error_inode(inode, func, line, block, @@ -304,17 +304,17 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, /* Walk through a dirent block to find a checksum "dirent" at the tail */ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, - struct ext4_dir_entry *de) + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; #ifdef PARANOID struct ext4_dir_entry *d, *top; - d = de; - top = (struct ext4_dir_entry *)(((void *)de) + + d = (struct ext4_dir_entry *)bh->b_data; + top = (struct ext4_dir_entry *)(bh->b_data + (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); + sizeof(struct ext4_dir_entry_tail))); while (d < top && d->rec_len) d = (struct ext4_dir_entry *)(((void *)d) + le16_to_cpu(d->rec_len)); @@ -324,7 +324,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, t = (struct ext4_dir_entry_tail *)d; #else - t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); + t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb)); #endif if (t->det_reserved_zero1 || @@ -336,8 +336,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, return t; } -static __le32 ext4_dirent_csum(struct inode *inode, - struct ext4_dir_entry *dirent, int size) +static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); @@ -357,49 +356,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func, "No space for directory leaf checksum. Please run e2fsck -D."); } -int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) +int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return 1; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return 0; } - if (t->det_checksum != ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent)) + if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data)) return 0; return 1; } -static void ext4_dirent_csum_set(struct inode *inode, - struct ext4_dir_entry *dirent) +static void ext4_dirblock_csum_set(struct inode *inode, + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return; } - t->det_checksum = ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent); + t->det_checksum = ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data); } -int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) +int ext4_handle_dirty_dirblock(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) { - ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); + ext4_dirblock_csum_set(inode, bh); return ext4_handle_dirty_metadata(handle, inode, bh); } @@ -1530,8 +1529,7 @@ restart: if (!buffer_verified(bh) && !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { + !ext4_dirblock_csum_verify(dir, bh)) { EXT4_ERROR_INODE(dir, "checksumming directory " "block %lu", (unsigned long)block); brelse(bh); @@ -1894,7 +1892,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); + err = ext4_handle_dirty_dirblock(handle, dir, bh2); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); @@ -2022,7 +2020,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, inode_inc_iversion(dir); ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); return 0; @@ -2126,7 +2124,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (retval) goto out_frames; - retval = ext4_handle_dirty_dirent_node(handle, dir, bh2); + retval = ext4_handle_dirty_dirblock(handle, dir, bh2); if (retval) goto out_frames; @@ -2512,7 +2510,7 @@ static int ext4_delete_entry(handle_t *handle, goto out; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (unlikely(err)) goto out; @@ -2744,7 +2742,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, } BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) goto out; set_buffer_verified(dir_block); @@ -3492,9 +3490,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, ent->inode, ent->dir_bh); } else { - retval = ext4_handle_dirty_dirent_node(handle, - ent->inode, - ent->dir_bh); + retval = ext4_handle_dirty_dirblock(handle, ent->inode, + ent->dir_bh); } } else { retval = ext4_mark_inode_dirty(handle, ent->inode); @@ -3524,8 +3521,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { - retval = ext4_handle_dirty_dirent_node(handle, - ent->dir, ent->bh); + retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); if (unlikely(retval)) { ext4_std_error(ent->dir->i_sb, retval); return retval; -- cgit v1.2.3 From ddce3b94715ca5a19a107cd7c1d89fea177d2454 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 21 Jun 2019 16:31:47 -0400 Subject: ext4: refactor initialize_dirent_tail() Move the calculation of the location of the dirent tail into initialize_dirent_tail(). Also prefix the function with ext4_ to fix kernel namepsace polution. Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 ++-- fs/ext4/inline.c | 9 +++------ fs/ext4/namei.c | 54 +++++++++++++++++++++--------------------------------- 3 files changed, 26 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5b86df7ec326..83128bdd7abb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3147,8 +3147,8 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, struct ext4_dir_entry_2 *de, int blocksize, int csum_size, unsigned int parent_ino, int dotdot_real_len); -extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize); +extern void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize); extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, struct buffer_head *bh); extern int ext4_ci_compare(const struct inode *parent, diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f19dd5a08d0d..796137bb7dfa 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, { int err, csum_size = 0, header_size = 0; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; void *target = dir_block->b_data; /* @@ -1158,11 +1157,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, inode->i_sb->s_blocksize - csum_size); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, - inode->i_sb->s_blocksize); - initialize_dirent_tail(t, inode->i_sb->s_blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(dir_block, + inode->i_sb->s_blocksize); set_buffer_uptodate(dir_block); err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4f0bcbbcfe96..183ad614ae3d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -293,9 +293,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); /* checksumming functions */ -void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize) +void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize) { + struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); + memset(t, 0, sizeof(struct ext4_dir_entry_tail)); t->det_rec_len = ext4_rec_len_to_disk( sizeof(struct ext4_dir_entry_tail), blocksize); @@ -370,7 +372,7 @@ int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh) } if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data, - (char *)t - bh->b_data)) + (char *)t - bh->b_data)) return 0; return 1; @@ -391,7 +393,7 @@ static void ext4_dirblock_csum_set(struct inode *inode, } t->det_checksum = ext4_dirblock_csum(inode, bh->b_data, - (char *)t - bh->b_data); + (char *)t - bh->b_data); } int ext4_handle_dirty_dirblock(handle_t *handle, @@ -1813,7 +1815,6 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; - struct ext4_dir_entry_tail *t; int csum_size = 0; int err = 0, i; @@ -1874,11 +1875,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, (char *) de2, blocksize); if (csum_size) { - t = EXT4_DIRENT_TAIL(data2, blocksize); - initialize_dirent_tail(t, blocksize); - - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); + ext4_initialize_dirent_tail(*bh, blocksize); + ext4_initialize_dirent_tail(bh2, blocksize); } dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1, @@ -2039,8 +2037,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; - struct ext4_dir_entry_tail *t; - char *data1, *top; + char *data2, *top; unsigned len; int retval; unsigned blocksize; @@ -2080,21 +2077,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, return PTR_ERR(bh2); } ext4_set_inode_flag(dir, EXT4_INODE_INDEX); - data1 = bh2->b_data; + data2 = bh2->b_data; - memcpy (data1, de, len); - de = (struct ext4_dir_entry_2 *) data1; - top = data1 + len; + memcpy(data2, de, len); + de = (struct ext4_dir_entry_2 *) data2; + top = data2 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, - blocksize); + de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh2, blocksize); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); @@ -2164,7 +2158,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; struct super_block *sb; struct ext4_sb_info *sbi; struct ext4_filename fname; @@ -2249,10 +2242,8 @@ add_to_new_block: de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh, blocksize); retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh); out: @@ -2712,7 +2703,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, { struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; ext4_lblk_t block = 0; unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; @@ -2736,10 +2726,8 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, de = (struct ext4_dir_entry_2 *)dir_block->b_data; ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); set_nlink(inode, 2); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(dir_block, blocksize); BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_dirblock(handle, inode, dir_block); -- cgit v1.2.3 From 7633b08b2750513cef662fbcbe66065b9940fc6a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 21 Jun 2019 21:57:00 -0400 Subject: ext4: rename htree_inline_dir_to_tree() to ext4_inlinedir_to_tree() Clean up namespace pollution by the inline_data code. Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 10 +++++----- fs/ext4/inline.c | 10 +++++----- fs/ext4/namei.c | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 83128bdd7abb..bf660aa7a9e0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3104,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle, extern int ext4_read_inline_dir(struct file *filp, struct dir_context *ctx, int *has_inline_data); -extern int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data); +extern int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data); extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 796137bb7dfa..88cdf3c90bd1 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1324,11 +1324,11 @@ out: * inlined dir. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ -int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data) +int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data) { int err = 0, count = 0; unsigned int parent_ino; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 183ad614ae3d..c9568fee9e11 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1104,10 +1104,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - count = htree_inlinedir_to_tree(dir_file, dir, 0, - &hinfo, start_hash, - start_minor_hash, - &has_inline_data); + count = ext4_inlinedir_to_tree(dir_file, dir, 0, + &hinfo, start_hash, + start_minor_hash, + &has_inline_data); if (has_inline_data) { *next_hash = ~0; return count; -- cgit v1.2.3 From 78e9605d4fdde6d58b2e6db5b6b52dde7f92333e Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Tue, 2 Jul 2019 17:38:55 -0400 Subject: ext4: replace ktype default_attrs with default_groups The kobj_type default_attrs field is being replaced by the default_groups field. Replace the default_attrs field in ext4_sb_ktype and ext4_feat_ktype with default_groups. Use the ATTRIBUTE_GROUPS macro to create ext4_groups and ext4_feat_groups. Signed-off-by: Kimberly Brown Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 04b4f53f0659..b3cd7655a6ff 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(journal_task), NULL, }; +ATTRIBUTE_GROUPS(ext4); /* Features this copy of ext4 supports */ EXT4_ATTR_FEATURE(lazy_itable_init); @@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(metadata_csum_seed), NULL, }; +ATTRIBUTE_GROUPS(ext4_feat); static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) { @@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = { }; static struct kobj_type ext4_sb_ktype = { - .default_attrs = ext4_attrs, + .default_groups = ext4_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_sb_release, }; static struct kobj_type ext4_feat_ktype = { - .default_attrs = ext4_feat_attrs, + .default_groups = ext4_feat_groups, .sysfs_ops = &ext4_attr_ops, .release = (void (*)(struct kobject *))kfree, }; -- cgit v1.2.3 From 96fcaf86c3cb9340015fb475d79ef0a6fcf858ed Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 2 Jul 2019 17:53:22 -0400 Subject: ext4: fix coverity warning on error path of filename setup Fix the following coverity warning reported by Dan Carpenter: fs/ext4/namei.c:1311 ext4_fname_setup_ci_filename() warn: 'cf_name->len' unsigned <= 0 Fixes: 3ae72562ad91 ("ext4: optimize case-insensitive lookups") Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o Reported-by: Dan Carpenter --- fs/ext4/namei.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index c9568fee9e11..129029534075 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1310,6 +1310,8 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct fscrypt_str *cf_name) { + int len; + if (!IS_CASEFOLDED(dir)) { cf_name->name = NULL; return; @@ -1319,13 +1321,16 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, if (!cf_name->name) return; - cf_name->len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, - iname, cf_name->name, - EXT4_NAME_LEN); - if (cf_name->len <= 0) { + len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + iname, cf_name->name, + EXT4_NAME_LEN); + if (len <= 0) { kfree(cf_name->name); cf_name->name = NULL; + return; } + cf_name->len = (unsigned) len; + } #endif -- cgit v1.2.3