summaryrefslogtreecommitdiff
path: root/fs/ext4/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/file.c')
-rw-r--r--fs/ext4/file.c110
1 files changed, 61 insertions, 49 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d101b3b0c7da..c457c8517f0f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -147,6 +147,17 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return generic_file_read_iter(iocb, to);
}
+static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode = file_inode(in);
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+ return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
/*
* Called when an inode is released. Note that this is different
* from ext4_file_open: open gets called at every open, but release
@@ -285,18 +296,13 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
if (ret <= 0)
goto out;
- current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
out:
inode_unlock(inode);
- if (likely(ret > 0)) {
- iocb->ki_pos += ret;
- ret = generic_write_sync(iocb, ret);
- }
-
- return ret;
+ if (unlikely(ret <= 0))
+ return ret;
+ return generic_write_sync(iocb, ret);
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
@@ -444,13 +450,14 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
*/
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend,
- bool *unwritten)
+ bool *unwritten, int *dio_flags)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t offset;
size_t count;
ssize_t ret;
+ bool overwrite, unaligned_io;
restart:
ret = ext4_generic_write_checks(iocb, from);
@@ -459,16 +466,20 @@ restart:
offset = iocb->ki_pos;
count = ret;
- if (ext4_extending_io(inode, offset, count))
- *extend = true;
+
+ unaligned_io = ext4_unaligned_io(inode, from, offset);
+ *extend = ext4_extending_io(inode, offset, count);
+ overwrite = ext4_overwrite_io(inode, offset, count, unwritten);
+
/*
- * Determine whether the IO operation will overwrite allocated
- * and initialized blocks.
- * We need exclusive i_rwsem for changing security info
- * in file_modified().
+ * Determine whether we need to upgrade to an exclusive lock. This is
+ * required to change security info in file_modified(), for extending
+ * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
+ * extents (as partial block zeroing may be required).
*/
- if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
- !ext4_overwrite_io(inode, offset, count, unwritten))) {
+ if (*ilock_shared &&
+ ((!IS_NOSEC(inode) || *extend || !overwrite ||
+ (unaligned_io && *unwritten)))) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
@@ -479,6 +490,32 @@ restart:
goto restart;
}
+ /*
+ * Now that locking is settled, determine dio flags and exclusivity
+ * requirements. Unaligned writes are allowed under shared lock so long
+ * as they are pure overwrites. Set the iomap overwrite only flag as an
+ * added precaution in this case. Even though this is unnecessary, we
+ * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
+ * write is ever submitted.
+ *
+ * Otherwise, concurrent unaligned writes risk data corruption due to
+ * partial block zeroing in the dio layer, and so the I/O must occur
+ * exclusively. The inode lock is already held exclusive if the write is
+ * non-overwrite or extending, so drain all outstanding dio and set the
+ * force wait dio flag.
+ */
+ if (*ilock_shared && unaligned_io) {
+ *dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
+ } else if (!*ilock_shared && (unaligned_io || *extend)) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (unaligned_io && (!overwrite || *unwritten))
+ inode_dio_wait(inode);
+ *dio_flags = IOMAP_DIO_FORCE_WAIT;
+ }
+
ret = file_modified(file);
if (ret < 0)
goto out;
@@ -500,18 +537,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
- bool extend = false, unaligned_io = false, unwritten = false;
+ bool extend = false, unwritten = false;
bool ilock_shared = true;
+ int dio_flags = 0;
/*
- * We initially start with shared inode lock unless it is
- * unaligned IO which needs exclusive lock anyways.
- */
- if (ext4_unaligned_io(inode, from, offset)) {
- unaligned_io = true;
- ilock_shared = false;
- }
- /*
* Quick check here without any i_rwsem lock to see if it is extending
* IO. A more reliable check is done in ext4_dio_write_checks() with
* proper locking in place.
@@ -543,16 +573,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from);
}
- ret = ext4_dio_write_checks(iocb, from,
- &ilock_shared, &extend, &unwritten);
+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend,
+ &unwritten, &dio_flags);
if (ret <= 0)
return ret;
- /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
- if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
- ret = -EAGAIN;
- goto out;
- }
/*
* Make sure inline data cannot be created anymore since we are going
* to allocate blocks for DIO. We know the inode does not have any
@@ -563,19 +588,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
offset = iocb->ki_pos;
count = ret;
- /*
- * Unaligned direct IO must be serialized among each other as zeroing
- * of partial blocks of two competing unaligned IOs can result in data
- * corruption.
- *
- * So we make sure we don't allow any unaligned IO in flight.
- * For IOs where we need not wait (like unaligned non-AIO DIO),
- * below inode_dio_wait() may anyway become a no-op, since we start
- * with exclusive lock.
- */
- if (unaligned_io)
- inode_dio_wait(inode);
-
if (extend) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
@@ -595,8 +607,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
- (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
- NULL, 0);
+ dio_flags, NULL, 0);
+ WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
if (ret == -ENOTBLK)
ret = 0;
@@ -957,7 +969,7 @@ const struct file_operations ext4_file_operations = {
.release = ext4_release_file,
.fsync = ext4_sync_file,
.get_unmapped_area = thp_get_unmapped_area,
- .splice_read = generic_file_splice_read,
+ .splice_read = ext4_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
};