summaryrefslogtreecommitdiff
path: root/fs/btrfs/bio.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2023-01-21 09:50:30 +0300
committerDavid Sterba <dsterba@suse.com>2023-02-15 21:38:53 +0300
commitd5e4377d505189c30df50d54f9944d7fb8d528bb (patch)
tree6eb5e035002e63d9ef931934b30199bca9fc3b75 /fs/btrfs/bio.c
parent243cf8d1b6737d4b53ac16b211987bbd299478e6 (diff)
downloadlinux-d5e4377d505189c30df50d54f9944d7fb8d528bb.tar.xz
btrfs: split zone append bios in btrfs_submit_bio
The current btrfs zoned device support is a little cumbersome in the data I/O path as it requires the callers to not issue I/O larger than the supported ZONE_APPEND size of the underlying device. This leads to a lot of extra accounting. Instead change btrfs_submit_bio so that it can take write bios of arbitrary size and form from the upper layers, and just split them internally to the ZONE_APPEND queue limits. Then remove all the upper layer warts catering to limited write sized on zoned devices, including the extra refcount in the compressed_bio. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r--fs/btrfs/bio.c44
1 files changed, 27 insertions, 17 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 1de40e064170..8445df47495a 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -59,13 +59,22 @@ struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
return bio;
}
-static struct bio *btrfs_split_bio(struct bio *orig, u64 map_length)
+static struct bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
+ struct bio *orig, u64 map_length,
+ bool use_append)
{
struct btrfs_bio *orig_bbio = btrfs_bio(orig);
struct bio *bio;
- bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
- &btrfs_clone_bioset);
+ if (use_append) {
+ unsigned int nr_segs;
+
+ bio = bio_split_rw(orig, &fs_info->limits, &nr_segs,
+ &btrfs_clone_bioset, map_length);
+ } else {
+ bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
+ &btrfs_clone_bioset);
+ }
btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio);
btrfs_bio(bio)->file_offset = orig_bbio->file_offset;
@@ -397,16 +406,10 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
*/
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 zone_start = round_down(physical, dev->fs_info->zone_size);
- if (btrfs_dev_is_sequential(dev, physical)) {
- u64 zone_start = round_down(physical,
- dev->fs_info->zone_size);
-
- bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
- } else {
- bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
- bio->bi_opf |= REQ_OP_WRITE;
- }
+ ASSERT(btrfs_dev_is_sequential(dev, physical));
+ bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
}
btrfs_debug_in_rcu(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
@@ -603,11 +606,13 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
{
struct btrfs_bio *bbio = btrfs_bio(bio);
- struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_bio *orig_bbio = bbio;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
+ bool use_append = btrfs_use_zone_append(inode, logical);
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap;
blk_status_t ret;
@@ -622,8 +627,11 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
}
map_length = min(map_length, length);
+ if (use_append)
+ map_length = min(map_length, fs_info->max_zone_append_size);
+
if (map_length < length) {
- bio = btrfs_split_bio(bio, map_length);
+ bio = btrfs_split_bio(fs_info, bio, map_length, use_append);
bbio = btrfs_bio(bio);
}
@@ -639,7 +647,9 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
}
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ if (use_append) {
+ bio->bi_opf &= ~REQ_OP_WRITE;
+ bio->bi_opf |= REQ_OP_ZONE_APPEND;
ret = btrfs_extract_ordered_extent(btrfs_bio(bio));
if (ret)
goto fail_put_bio;
@@ -649,9 +659,9 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
* Csum items for reloc roots have already been cloned at this
* point, so they are handled as part of the no-checksum case.
*/
- if (!(bbio->inode->flags & BTRFS_INODE_NODATASUM) &&
+ if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
- !btrfs_is_data_reloc_root(bbio->inode->root)) {
+ !btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) &&
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
goto done;