summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c701
1 files changed, 120 insertions, 581 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8bcad9940154..6c18dc9a1831 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -84,27 +84,12 @@ struct btrfs_dio_data {
};
struct btrfs_dio_private {
- struct btrfs_inode *inode;
-
- /*
- * Since DIO can use anonymous page, we cannot use page_offset() to
- * grab the file offset, thus need a dedicated member for file offset.
- */
+ /* Range of I/O */
u64 file_offset;
- /* Used for bio::bi_size */
u32 bytes;
- /*
- * References to this structure. There is one reference per in-flight
- * bio plus one while we're still setting up.
- */
- refcount_t refs;
-
- /* Array of checksums */
- u8 *csums;
-
/* This must be last */
- struct bio bio;
+ struct btrfs_bio bbio;
};
static struct bio_set btrfs_dio_bioset;
@@ -228,7 +213,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
{
unsigned long index = offset >> PAGE_SHIFT;
unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
- u64 page_start, page_end;
+ u64 page_start = 0, page_end = 0;
struct page *page;
if (locked_page) {
@@ -2536,19 +2521,6 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
}
/*
- * in order to insert checksums into the metadata in large chunks,
- * we wait until bio submission time. All the pages in the bio are
- * checksummed and sums are attached onto the ordered extent record.
- *
- * At IO completion time the cums attached on the ordered extent record
- * are inserted into the btree
- */
-blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio)
-{
- return btrfs_csum_one_bio(inode, bio, (u64)-1, false);
-}
-
-/*
* Split an extent_map at [start, start + len]
*
* This function is intended to be used only for extract_ordered_extent().
@@ -2663,19 +2635,19 @@ out:
return ret;
}
-static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
- struct bio *bio, loff_t file_offset)
+blk_status_t btrfs_extract_ordered_extent(struct btrfs_bio *bbio)
{
+ u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 len = bbio->bio.bi_iter.bi_size;
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_ordered_extent *ordered;
- u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 file_len;
- u64 len = bio->bi_iter.bi_size;
u64 end = start + len;
u64 ordered_end;
u64 pre, post;
int ret = 0;
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
+ ordered = btrfs_lookup_ordered_extent(inode, bbio->file_offset);
if (WARN_ON_ONCE(!ordered))
return BLK_STS_IOERR;
@@ -2715,7 +2687,7 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
ret = btrfs_split_ordered_extent(ordered, pre, post);
if (ret)
goto out;
- ret = split_zoned_em(inode, file_offset, file_len, pre, post);
+ ret = split_zoned_em(inode, bbio->file_offset, file_len, pre, post);
out:
btrfs_put_ordered_extent(ordered);
@@ -2723,75 +2695,6 @@ out:
return errno_to_blk_status(ret);
}
-void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- blk_status_t ret;
-
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- ret = extract_ordered_extent(inode, bio,
- page_offset(bio_first_bvec_all(bio)->bv_page));
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
- }
-
- /*
- * If we need to checksum, and the I/O is not issued by fsync and
- * friends, that is ->sync_writers != 0, defer the submission to a
- * workqueue to parallelize it.
- *
- * Csum items for reloc roots have already been cloned at this point,
- * so they are handled as part of the no-checksum case.
- */
- if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
- !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
- !btrfs_is_data_reloc_root(inode->root)) {
- if (!atomic_read(&inode->sync_writers) &&
- btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_DATA))
- return;
-
- ret = btrfs_csum_one_bio(inode, bio, (u64)-1, false);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
- }
- btrfs_submit_bio(fs_info, bio, mirror_num);
-}
-
-void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio,
- int mirror_num, enum btrfs_compression_type compress_type)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- blk_status_t ret;
-
- if (compress_type != BTRFS_COMPRESS_NONE) {
- /*
- * btrfs_submit_compressed_read will handle completing the bio
- * if there were any errors, so just return here.
- */
- btrfs_submit_compressed_read(&inode->vfs_inode, bio, mirror_num);
- return;
- }
-
- /* Save the original iter for read repair */
- btrfs_bio(bio)->iter = bio->bi_iter;
-
- /*
- * Lookup bio sums does extra checks around whether we need to csum or
- * not, which is why we ignore skip_sum here.
- */
- ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
-
- btrfs_submit_bio(fs_info, bio, mirror_num);
-}
-
/*
* given a list of ordered sums record them in the inode. This happens
* at IO completion time based on sums calculated at bio submission time.
@@ -2969,7 +2872,7 @@ again:
unlock_extent(&inode->io_tree, page_start, page_end,
&cached_state);
unlock_page(page);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
@@ -3259,15 +3162,13 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- /* A valid bdev implies a write on a sequential zone */
- if (ordered_extent->bdev) {
+ /* A valid ->physical implies a write on a sequential zone. */
+ if (ordered_extent->physical != (u64)-1) {
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
}
- btrfs_free_io_failure_record(inode, start, end);
-
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
logical_len = ordered_extent->truncated_len;
@@ -3474,109 +3375,55 @@ static u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums, u64 of
}
/*
- * check_data_csum - verify checksum of one sector of uncompressed data
- * @inode: inode
- * @bbio: btrfs_bio which contains the csum
+ * Verify the checksum of a single data sector.
+ *
+ * @bbio: btrfs_io_bio which contains the csum
+ * @dev: device the sector is on
* @bio_offset: offset to the beginning of the bio (in bytes)
- * @page: page where is the data to be verified
- * @pgoff: offset inside the page
+ * @bv: bio_vec to check
*
- * The length of such check is always one sector size.
+ * Check if the checksum on a data block is valid. When a checksum mismatch is
+ * detected, report the error and fill the corrupted range with zero.
*
- * When csum mismatch is detected, we will also report the error and fill the
- * corrupted range with zero. (Thus it needs the extra parameters)
+ * Return %true if the sector is ok or had no checksum to start with, else %false.
*/
-int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio,
- u32 bio_offset, struct page *page, u32 pgoff)
+bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
+ u32 bio_offset, struct bio_vec *bv)
{
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 len = fs_info->sectorsize;
+ u64 file_offset = bbio->file_offset + bio_offset;
+ u64 end = file_offset + bv->bv_len - 1;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
- ASSERT(pgoff + len <= PAGE_SIZE);
+ ASSERT(bv->bv_len == fs_info->sectorsize);
- csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset);
+ if (!bbio->csum)
+ return true;
+
+ if (btrfs_is_data_reloc_root(inode->root) &&
+ test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM,
+ 1, NULL)) {
+ /* Skip the range without csum for data reloc inode */
+ clear_extent_bits(&inode->io_tree, file_offset, end,
+ EXTENT_NODATASUM);
+ return true;
+ }
- if (btrfs_check_sector_csum(fs_info, page, pgoff, csum, csum_expected))
+ csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset);
+ if (btrfs_check_sector_csum(fs_info, bv->bv_page, bv->bv_offset, csum,
+ csum_expected))
goto zeroit;
- return 0;
+ return true;
zeroit:
- btrfs_print_data_csum_error(inode, bbio->file_offset + bio_offset,
- csum, csum_expected, bbio->mirror_num);
- if (bbio->device)
- btrfs_dev_stat_inc_and_print(bbio->device,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- memzero_page(page, pgoff, len);
- return -EIO;
-}
-
-/*
- * When reads are done, we need to check csums to verify the data is correct.
- * if there's a match, we allow the bio to finish. If not, the code in
- * extent_io.c will try to find good copies for us.
- *
- * @bio_offset: offset to the beginning of the bio (in bytes)
- * @start: file offset of the range start
- * @end: file offset of the range end (inclusive)
- *
- * Return a bitmap where bit set means a csum mismatch, and bit not set means
- * csum match.
- */
-unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
- u32 bio_offset, struct page *page,
- u64 start, u64 end)
-{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct extent_io_tree *io_tree = &inode->io_tree;
- const u32 sectorsize = root->fs_info->sectorsize;
- u32 pg_off;
- unsigned int result = 0;
-
- /*
- * This only happens for NODATASUM or compressed read.
- * Normally this should be covered by above check for compressed read
- * or the next check for NODATASUM. Just do a quicker exit here.
- */
- if (bbio->csum == NULL)
- return 0;
-
- if (inode->flags & BTRFS_INODE_NODATASUM)
- return 0;
-
- if (unlikely(test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)))
- return 0;
-
- ASSERT(page_offset(page) <= start &&
- end <= page_offset(page) + PAGE_SIZE - 1);
- for (pg_off = offset_in_page(start);
- pg_off < offset_in_page(end);
- pg_off += sectorsize, bio_offset += sectorsize) {
- u64 file_offset = pg_off + page_offset(page);
- int ret;
-
- if (btrfs_is_data_reloc_root(root) &&
- test_range_bit(io_tree, file_offset,
- file_offset + sectorsize - 1,
- EXTENT_NODATASUM, 1, NULL)) {
- /* Skip the range without csum for data reloc inode */
- clear_extent_bits(io_tree, file_offset,
- file_offset + sectorsize - 1,
- EXTENT_NODATASUM);
- continue;
- }
- ret = btrfs_check_data_csum(inode, bbio, bio_offset, page, pg_off);
- if (ret < 0) {
- const int nr_bit = (pg_off - offset_in_page(start)) >>
- root->fs_info->sectorsize_bits;
-
- result |= (1U << nr_bit);
- }
- }
- return result;
+ btrfs_print_data_csum_error(inode, file_offset, csum, csum_expected,
+ bbio->mirror_num);
+ if (dev)
+ btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ memzero_bvec(bv);
+ return false;
}
/*
@@ -4987,7 +4834,7 @@ again:
unlock_extent(io_tree, block_start, block_end, &cached_state);
unlock_page(page);
put_page(page);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
@@ -5281,7 +5128,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
return ret;
}
-static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+static int btrfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -5291,7 +5138,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
if (btrfs_root_readonly(root))
return -EROFS;
- err = setattr_prepare(mnt_userns, dentry, attr);
+ err = setattr_prepare(idmap, dentry, attr);
if (err)
return err;
@@ -5302,12 +5149,12 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
}
if (attr->ia_valid) {
- setattr_copy(mnt_userns, inode, attr);
+ setattr_copy(idmap, inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(BTRFS_I(inode));
if (!err && attr->ia_valid & ATTR_MODE)
- err = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
+ err = posix_acl_chmod(idmap, dentry, inode->i_mode);
}
return err;
@@ -5466,8 +5313,6 @@ void btrfs_evict_inode(struct inode *inode)
if (is_bad_inode(inode))
goto no_delete;
- btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
-
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
goto no_delete;
@@ -6724,7 +6569,7 @@ out_inode:
return err;
}
-static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -6732,13 +6577,13 @@ static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, rdev);
return btrfs_create_common(dir, dentry, inode);
}
-static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
@@ -6746,7 +6591,7 @@ static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
inode->i_mapping->a_ops = &btrfs_aops;
@@ -6837,7 +6682,7 @@ fail:
return err;
}
-static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
@@ -6845,7 +6690,7 @@ static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, S_IFDIR | mode);
+ inode_init_owner(idmap, inode, dir, S_IFDIR | mode);
inode->i_op = &btrfs_dir_inode_operations;
inode->i_fop = &btrfs_dir_file_operations;
return btrfs_create_common(dir, dentry, inode);
@@ -7092,7 +6937,7 @@ next:
* Other members are not utilized for inline extents.
*/
ASSERT(em->block_start == EXTENT_MAP_INLINE);
- ASSERT(em->len = fs_info->sectorsize);
+ ASSERT(em->len == fs_info->sectorsize);
ret = read_inline_extent(inode, path, page);
if (ret < 0)
@@ -7392,7 +7237,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
*/
if (writing ||
test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
else
ret = nowait ? -EAGAIN : -ENOTBLK;
btrfs_put_ordered_extent(ordered);
@@ -7833,10 +7678,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
iomap->offset = start;
iomap->bdev = fs_info->fs_devices->latest_dev->bdev;
iomap->length = len;
-
- if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
- iomap->flags |= IOMAP_F_ZONE_APPEND;
-
free_extent_map(em);
return 0;
@@ -7888,267 +7729,47 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
return ret;
}
-static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
-{
- /*
- * This implies a barrier so that stores to dio_bio->bi_status before
- * this and loads of dio_bio->bi_status after this are fully ordered.
- */
- if (!refcount_dec_and_test(&dip->refs))
- return;
-
- if (btrfs_op(&dip->bio) == BTRFS_MAP_WRITE) {
- btrfs_mark_ordered_io_finished(dip->inode, NULL,
- dip->file_offset, dip->bytes,
- !dip->bio.bi_status);
- } else {
- unlock_extent(&dip->inode->io_tree,
- dip->file_offset,
- dip->file_offset + dip->bytes - 1, NULL);
- }
-
- kfree(dip->csums);
- bio_endio(&dip->bio);
-}
-
-void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
-{
- struct btrfs_dio_private *dip = btrfs_bio(bio)->private;
-
- BUG_ON(bio_op(bio) == REQ_OP_WRITE);
-
- refcount_inc(&dip->refs);
- btrfs_submit_bio(inode->root->fs_info, bio, mirror_num);
-}
-
-static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
- struct btrfs_bio *bbio,
- const bool uptodate)
-{
- struct inode *inode = &dip->inode->vfs_inode;
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
- const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
- blk_status_t err = BLK_STS_OK;
- struct bvec_iter iter;
- struct bio_vec bv;
- u32 offset;
-
- btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
- u64 start = bbio->file_offset + offset;
-
- if (uptodate &&
- (!csum || !btrfs_check_data_csum(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset))) {
- btrfs_clean_io_failure(BTRFS_I(inode), start,
- bv.bv_page, bv.bv_offset);
- } else {
- int ret;
-
- ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset, false);
- if (ret)
- err = errno_to_blk_status(ret);
- }
- }
-
- return err;
-}
-
-blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode,
- struct bio *bio,
- u64 dio_file_offset)
+static void btrfs_dio_end_io(struct btrfs_bio *bbio)
{
- return btrfs_csum_one_bio(inode, bio, dio_file_offset, false);
-}
-
-static void btrfs_end_dio_bio(struct btrfs_bio *bbio)
-{
- struct btrfs_dio_private *dip = bbio->private;
+ struct btrfs_dio_private *dip =
+ container_of(bbio, struct btrfs_dio_private, bbio);
+ struct btrfs_inode *inode = bbio->inode;
struct bio *bio = &bbio->bio;
- blk_status_t err = bio->bi_status;
-
- if (err)
- btrfs_warn(dip->inode->root->fs_info,
- "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
- btrfs_ino(dip->inode), bio_op(bio),
- bio->bi_opf, bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size, err);
-
- if (bio_op(bio) == REQ_OP_READ)
- err = btrfs_check_read_dio_bio(dip, bbio, !err);
- if (err)
- dip->bio.bi_status = err;
-
- btrfs_record_physical_zoned(&dip->inode->vfs_inode, bbio->file_offset, bio);
-
- bio_put(bio);
- btrfs_dio_private_put(dip);
-}
-
-static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode,
- u64 file_offset, int async_submit)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_dio_private *dip = btrfs_bio(bio)->private;
- blk_status_t ret;
-
- /* Save the original iter for read repair */
- if (btrfs_op(bio) == BTRFS_MAP_READ)
- btrfs_bio(bio)->iter = bio->bi_iter;
-
- if (inode->flags & BTRFS_INODE_NODATASUM)
- goto map;
+ if (bio->bi_status) {
+ btrfs_warn(inode->root->fs_info,
+ "direct IO failed ino %llu op 0x%0x offset %#llx len %u err no %d",
+ btrfs_ino(inode), bio->bi_opf,
+ dip->file_offset, dip->bytes, bio->bi_status);
+ }
- if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
- /* Check btrfs_submit_data_write_bio() for async submit rules */
- if (async_submit && !atomic_read(&inode->sync_writers) &&
- btrfs_wq_submit_bio(inode, bio, 0, file_offset,
- WQ_SUBMIT_DATA_DIO))
- return;
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+ btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset,
+ dip->bytes, !bio->bi_status);
+ else
+ unlock_extent(&inode->io_tree, dip->file_offset,
+ dip->file_offset + dip->bytes - 1, NULL);
- /*
- * If we aren't doing async submit, calculate the csum of the
- * bio now.
- */
- ret = btrfs_csum_one_bio(inode, bio, file_offset, false);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- return;
- }
- } else {
- btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums,
- file_offset - dip->file_offset);
- }
-map:
- btrfs_submit_bio(fs_info, bio, 0);
+ bbio->bio.bi_private = bbio->private;
+ iomap_dio_bio_end_io(bio);
}
-static void btrfs_submit_direct(const struct iomap_iter *iter,
- struct bio *dio_bio, loff_t file_offset)
+static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
+ loff_t file_offset)
{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_dio_private *dip =
- container_of(dio_bio, struct btrfs_dio_private, bio);
- struct inode *inode = iter->inode;
- const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
- BTRFS_BLOCK_GROUP_RAID56_MASK);
- struct bio *bio;
- u64 start_sector;
- int async_submit = 0;
- u64 submit_len;
- u64 clone_offset = 0;
- u64 clone_len;
- u64 logical;
- int ret;
- blk_status_t status;
- struct btrfs_io_geometry geom;
+ container_of(bbio, struct btrfs_dio_private, bbio);
struct btrfs_dio_data *dio_data = iter->private;
- struct extent_map *em = NULL;
-
- dip->inode = BTRFS_I(inode);
- dip->file_offset = file_offset;
- dip->bytes = dio_bio->bi_iter.bi_size;
- refcount_set(&dip->refs, 1);
- dip->csums = NULL;
-
- if (!write && !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- unsigned int nr_sectors =
- (dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits);
-
- /*
- * Load the csums up front to reduce csum tree searches and
- * contention when submitting bios.
- */
- status = BLK_STS_RESOURCE;
- dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS);
- if (!dip->csums)
- goto out_err;
-
- status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
- if (status != BLK_STS_OK)
- goto out_err;
- }
-
- start_sector = dio_bio->bi_iter.bi_sector;
- submit_len = dio_bio->bi_iter.bi_size;
-
- do {
- logical = start_sector << 9;
- em = btrfs_get_chunk_map(fs_info, logical, submit_len);
- if (IS_ERR(em)) {
- status = errno_to_blk_status(PTR_ERR(em));
- em = NULL;
- goto out_err_em;
- }
- ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
- logical, &geom);
- if (ret) {
- status = errno_to_blk_status(ret);
- goto out_err_em;
- }
-
- clone_len = min(submit_len, geom.len);
- ASSERT(clone_len <= UINT_MAX);
-
- /*
- * This will never fail as it's passing GPF_NOFS and
- * the allocation is backed by btrfs_bioset.
- */
- bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len,
- btrfs_end_dio_bio, dip);
- btrfs_bio(bio)->file_offset = file_offset;
-
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- status = extract_ordered_extent(BTRFS_I(inode), bio,
- file_offset);
- if (status) {
- bio_put(bio);
- goto out_err;
- }
- }
-
- ASSERT(submit_len >= clone_len);
- submit_len -= clone_len;
-
- /*
- * Increase the count before we submit the bio so we know
- * the end IO handler won't happen before we increase the
- * count. Otherwise, the dip might get freed before we're
- * done setting it up.
- *
- * We transfer the initial reference to the last bio, so we
- * don't need to increment the reference count for the last one.
- */
- if (submit_len > 0) {
- refcount_inc(&dip->refs);
- /*
- * If we are submitting more than one bio, submit them
- * all asynchronously. The exception is RAID 5 or 6, as
- * asynchronous checksums make it difficult to collect
- * full stripe writes.
- */
- if (!raid56)
- async_submit = 1;
- }
- btrfs_submit_dio_bio(bio, BTRFS_I(inode), file_offset, async_submit);
+ btrfs_bio_init(bbio, BTRFS_I(iter->inode), btrfs_dio_end_io, bio->bi_private);
+ bbio->file_offset = file_offset;
- dio_data->submitted += clone_len;
- clone_offset += clone_len;
- start_sector += clone_len >> 9;
- file_offset += clone_len;
-
- free_extent_map(em);
- } while (submit_len > 0);
- return;
+ dip->file_offset = file_offset;
+ dip->bytes = bio->bi_iter.bi_size;
-out_err_em:
- free_extent_map(em);
-out_err:
- dio_bio->bi_status = status;
- btrfs_dio_private_put(dip);
+ dio_data->submitted += bio->bi_iter.bi_size;
+ btrfs_submit_bio(bio, 0);
}
static const struct iomap_ops btrfs_dio_iomap_ops = {
@@ -8157,7 +7778,7 @@ static const struct iomap_ops btrfs_dio_iomap_ops = {
};
static const struct iomap_dio_ops btrfs_dio_ops = {
- .submit_io = btrfs_submit_direct,
+ .submit_io = btrfs_dio_submit_io,
.bio_set = &btrfs_dio_bioset,
};
@@ -8552,7 +8173,7 @@ again:
unlock_extent(io_tree, page_start, page_end, &cached_state);
unlock_page(page);
up_read(&BTRFS_I(inode)->i_mmap_lock);
- btrfs_start_ordered_extent(ordered, 1);
+ btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
@@ -8802,7 +8423,7 @@ out:
return ret;
}
-struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
+struct inode *btrfs_new_subvol_inode(struct mnt_idmap *idmap,
struct inode *dir)
{
struct inode *inode;
@@ -8813,7 +8434,7 @@ struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
* Subvolumes don't inherit the sgid bit or the parent's gid if
* the parent's sgid bit is set. This is probably a bug.
*/
- inode_init_owner(mnt_userns, inode, NULL,
+ inode_init_owner(idmap, inode, NULL,
S_IFDIR | (~current_umask() & S_IRWXUGO));
inode->i_op = &btrfs_dir_inode_operations;
inode->i_fop = &btrfs_dir_file_operations;
@@ -8850,7 +8471,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
- spin_lock_init(&ei->io_failure_lock);
ei->outstanding_extents = 0;
if (sb->s_magic != BTRFS_TEST_MAGIC)
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
@@ -8870,7 +8490,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_tree.inode = ei;
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT);
- ei->io_failure_tree = RB_ROOT;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -8994,7 +8613,7 @@ int __init btrfs_init_cachep(void)
goto fail;
if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE,
- offsetof(struct btrfs_dio_private, bio),
+ offsetof(struct btrfs_dio_private, bbio.bio),
BIOSET_NEED_BVECS))
goto fail;
@@ -9004,7 +8623,7 @@ fail:
return -ENOMEM;
}
-static int btrfs_getattr(struct user_namespace *mnt_userns,
+static int btrfs_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -9034,7 +8653,7 @@ static int btrfs_getattr(struct user_namespace *mnt_userns,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(mnt_userns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
@@ -9289,14 +8908,14 @@ out_notrans:
return ret;
}
-static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns,
+static struct inode *new_whiteout_inode(struct mnt_idmap *idmap,
struct inode *dir)
{
struct inode *inode;
inode = new_inode(dir->i_sb);
if (inode) {
- inode_init_owner(mnt_userns, inode, dir,
+ inode_init_owner(idmap, inode, dir,
S_IFCHR | WHITEOUT_MODE);
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
@@ -9304,7 +8923,7 @@ static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns,
return inode;
}
-static int btrfs_rename(struct user_namespace *mnt_userns,
+static int btrfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
@@ -9376,9 +8995,11 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
filemap_flush(old_inode->i_mapping);
if (flags & RENAME_WHITEOUT) {
- whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir);
- if (!whiteout_args.inode)
- return -ENOMEM;
+ whiteout_args.inode = new_whiteout_inode(idmap, old_dir);
+ if (!whiteout_args.inode) {
+ ret = -ENOMEM;
+ goto out_fscrypt_names;
+ }
ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items);
if (ret)
goto out_whiteout_inode;
@@ -9543,7 +9164,7 @@ out_fscrypt_names:
return ret;
}
-static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+static int btrfs_rename2(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -9556,7 +9177,7 @@ static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_di
ret = btrfs_rename_exchange(old_dir, old_dentry, new_dir,
new_dentry);
else
- ret = btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
+ ret = btrfs_rename(idmap, old_dir, old_dentry, new_dir,
new_dentry, flags);
btrfs_btree_balance_dirty(BTRFS_I(new_dir)->root->fs_info);
@@ -9756,7 +9377,7 @@ out:
return ret;
}
-static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -9784,7 +9405,7 @@ static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, S_IFLNK | S_IRWXUGO);
+ inode_init_owner(idmap, inode, dir, S_IFLNK | S_IRWXUGO);
inode->i_op = &btrfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_aops;
@@ -10073,7 +9694,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
-static int btrfs_permission(struct user_namespace *mnt_userns,
+static int btrfs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -10086,10 +9707,10 @@ static int btrfs_permission(struct user_namespace *mnt_userns,
if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
return -EACCES;
}
- return generic_permission(mnt_userns, inode, mask);
+ return generic_permission(idmap, inode, mask);
}
-static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+static int btrfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -10107,7 +9728,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
- inode_init_owner(mnt_userns, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
inode->i_mapping->a_ops = &btrfs_aops;
@@ -10287,65 +9908,13 @@ struct btrfs_encoded_read_private {
wait_queue_head_t wait;
atomic_t pending;
blk_status_t status;
- bool skip_csum;
};
-static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
- struct bio *bio, int mirror_num)
-{
- struct btrfs_encoded_read_private *priv = btrfs_bio(bio)->private;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- blk_status_t ret;
-
- if (!priv->skip_csum) {
- ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
- if (ret)
- return ret;
- }
-
- atomic_inc(&priv->pending);
- btrfs_submit_bio(fs_info, bio, mirror_num);
- return BLK_STS_OK;
-}
-
-static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
-{
- const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
- struct btrfs_encoded_read_private *priv = bbio->private;
- struct btrfs_inode *inode = priv->inode;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 sectorsize = fs_info->sectorsize;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
- u32 bio_offset = 0;
-
- if (priv->skip_csum || !uptodate)
- return bbio->bio.bi_status;
-
- bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
- unsigned int i, nr_sectors, pgoff;
-
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
- pgoff = bvec->bv_offset;
- for (i = 0; i < nr_sectors; i++) {
- ASSERT(pgoff < PAGE_SIZE);
- if (btrfs_check_data_csum(inode, bbio, bio_offset,
- bvec->bv_page, pgoff))
- return BLK_STS_IOERR;
- bio_offset += sectorsize;
- pgoff += sectorsize;
- }
- }
- return BLK_STS_OK;
-}
-
static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
{
struct btrfs_encoded_read_private *priv = bbio->private;
- blk_status_t status;
- status = btrfs_encoded_read_verify_csum(bbio);
- if (status) {
+ if (bbio->bio.bi_status) {
/*
* The memory barrier implied by the atomic_dec_return() here
* pairs with the memory barrier implied by the
@@ -10354,11 +9923,10 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
* write is observed before the load of status in
* btrfs_encoded_read_regular_fill_pages().
*/
- WRITE_ONCE(priv->status, status);
+ WRITE_ONCE(priv->status, bbio->bio.bi_status);
}
if (!atomic_dec_return(&priv->pending))
wake_up(&priv->wait);
- btrfs_bio_free_csum(bbio);
bio_put(&bbio->bio);
}
@@ -10366,47 +9934,26 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size, struct page **pages)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_encoded_read_private priv = {
.inode = inode,
.file_offset = file_offset,
.pending = ATOMIC_INIT(1),
- .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
};
unsigned long i = 0;
u64 cur = 0;
- int ret;
init_waitqueue_head(&priv.wait);
- /*
- * Submit bios for the extent, splitting due to bio or stripe limits as
- * necessary.
- */
+ /* Submit bios for the extent, splitting due to bio limits as necessary. */
while (cur < disk_io_size) {
- struct extent_map *em;
- struct btrfs_io_geometry geom;
struct bio *bio = NULL;
- u64 remaining;
+ u64 remaining = disk_io_size - cur;
- em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
- disk_io_size - cur);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- } else {
- ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
- disk_bytenr + cur, &geom);
- free_extent_map(em);
- }
- if (ret) {
- WRITE_ONCE(priv.status, errno_to_blk_status(ret));
- break;
- }
- remaining = min(geom.len, disk_io_size - cur);
while (bio || remaining) {
size_t bytes = min_t(u64, remaining, PAGE_SIZE);
if (!bio) {
bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ,
+ inode,
btrfs_encoded_read_endio,
&priv);
bio->bi_iter.bi_sector =
@@ -10415,14 +9962,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
if (!bytes ||
bio_add_page(bio, pages[i], bytes, 0) < bytes) {
- blk_status_t status;
-
- status = submit_encoded_read_bio(inode, bio, 0);
- if (status) {
- WRITE_ONCE(priv.status, status);
- bio_put(bio);
- goto out;
- }
+ atomic_inc(&priv.pending);
+ btrfs_submit_bio(bio, 0);
bio = NULL;
continue;
}
@@ -10433,7 +9974,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
}
}
-out:
if (atomic_dec_return(&priv.pending))
io_wait_event(priv.wait, !atomic_read(&priv.pending));
/* See btrfs_encoded_read_endio() for ordering. */
@@ -10993,9 +10533,8 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
return 0;
max_pages = sis->max - bsi->nr_pages;
- first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
- next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
- PAGE_SIZE) >> PAGE_SHIFT;
+ first_ppage = PAGE_ALIGN(bsi->block_start) >> PAGE_SHIFT;
+ next_ppage = PAGE_ALIGN_DOWN(bsi->block_start + bsi->block_len) >> PAGE_SHIFT;
if (first_ppage >= next_ppage)
return 0;