summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c604
1 files changed, 91 insertions, 513 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 83dd3aa59663..c25fa74d7615 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -36,6 +36,7 @@
#include "file.h"
#include "dev-replace.h"
#include "super.h"
+#include "transaction.h"
static struct kmem_cache *extent_buffer_cache;
@@ -99,11 +100,19 @@ struct btrfs_bio_ctrl {
struct bio *bio;
int mirror_num;
enum btrfs_compression_type compress_type;
- u32 len_to_stripe_boundary;
u32 len_to_oe_boundary;
btrfs_bio_end_io_t end_io_func;
/*
+ * This is for metadata read, to provide the extra needed verification
+ * info. This has to be provided for submit_one_bio(), as
+ * submit_one_bio() can submit a bio if it ends at stripe boundary. If
+ * no such parent_check is provided, the metadata can hit false alert at
+ * endio time.
+ */
+ struct btrfs_tree_parent_check *parent_check;
+
+ /*
* Tell writepage not to lock the state bits for this range, it still
* does the unlocking.
*/
@@ -117,7 +126,7 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
{
struct bio *bio;
struct bio_vec *bv;
- struct btrfs_inode *inode;
+ struct inode *inode;
int mirror_num;
if (!bio_ctrl->bio)
@@ -125,21 +134,31 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
bio = bio_ctrl->bio;
bv = bio_first_bvec_all(bio);
- inode = BTRFS_I(bv->bv_page->mapping->host);
+ inode = bv->bv_page->mapping->host;
mirror_num = bio_ctrl->mirror_num;
/* Caller should ensure the bio has at least some range added */
ASSERT(bio->bi_iter.bi_size);
- btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset;
+ if (!is_data_inode(inode)) {
+ if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
+ /*
+ * For metadata read, we should have the parent_check,
+ * and copy it to bbio for metadata verification.
+ */
+ ASSERT(bio_ctrl->parent_check);
+ memcpy(&btrfs_bio(bio)->parent_check,
+ bio_ctrl->parent_check,
+ sizeof(struct btrfs_tree_parent_check));
+ }
+ bio->bi_opf |= REQ_META;
+ }
- if (!is_data_inode(&inode->vfs_inode))
- btrfs_submit_metadata_bio(inode, bio, mirror_num);
- else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
- btrfs_submit_data_write_bio(inode, bio, mirror_num);
+ if (btrfs_op(bio) == BTRFS_MAP_READ &&
+ bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
+ btrfs_submit_compressed_read(inode, bio, mirror_num);
else
- btrfs_submit_data_read_bio(inode, bio, mirror_num,
- bio_ctrl->compress_type);
+ btrfs_submit_bio(bio, mirror_num);
/* The bio is owned by the end_io handler now */
bio_ctrl->bio = NULL;
@@ -495,266 +514,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
start, end, page_ops, NULL);
}
-static int insert_failrec(struct btrfs_inode *inode,
- struct io_failure_record *failrec)
-{
- struct rb_node *exist;
-
- spin_lock(&inode->io_failure_lock);
- exist = rb_simple_insert(&inode->io_failure_tree, failrec->bytenr,
- &failrec->rb_node);
- spin_unlock(&inode->io_failure_lock);
-
- return (exist == NULL) ? 0 : -EEXIST;
-}
-
-static struct io_failure_record *get_failrec(struct btrfs_inode *inode, u64 start)
-{
- struct rb_node *node;
- struct io_failure_record *failrec = ERR_PTR(-ENOENT);
-
- spin_lock(&inode->io_failure_lock);
- node = rb_simple_search(&inode->io_failure_tree, start);
- if (node)
- failrec = rb_entry(node, struct io_failure_record, rb_node);
- spin_unlock(&inode->io_failure_lock);
- return failrec;
-}
-
-static void free_io_failure(struct btrfs_inode *inode,
- struct io_failure_record *rec)
-{
- spin_lock(&inode->io_failure_lock);
- rb_erase(&rec->rb_node, &inode->io_failure_tree);
- spin_unlock(&inode->io_failure_lock);
-
- kfree(rec);
-}
-
-static int next_mirror(const struct io_failure_record *failrec, int cur_mirror)
-{
- if (cur_mirror == failrec->num_copies)
- return cur_mirror + 1 - failrec->num_copies;
- return cur_mirror + 1;
-}
-
-static int prev_mirror(const struct io_failure_record *failrec, int cur_mirror)
-{
- if (cur_mirror == 1)
- return failrec->num_copies;
- return cur_mirror - 1;
-}
-
-/*
- * each time an IO finishes, we do a fast check in the IO failure tree
- * to see if we need to process or clean up an io_failure_record
- */
-int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
- struct page *page, unsigned int pg_offset)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct extent_io_tree *io_tree = &inode->io_tree;
- u64 ino = btrfs_ino(inode);
- u64 locked_start, locked_end;
- struct io_failure_record *failrec;
- int mirror;
- int ret;
-
- failrec = get_failrec(inode, start);
- if (IS_ERR(failrec))
- return 0;
-
- BUG_ON(!failrec->this_mirror);
-
- if (sb_rdonly(fs_info->sb))
- goto out;
-
- ret = find_first_extent_bit(io_tree, failrec->bytenr, &locked_start,
- &locked_end, EXTENT_LOCKED, NULL);
- if (ret || locked_start > failrec->bytenr ||
- locked_end < failrec->bytenr + failrec->len - 1)
- goto out;
-
- mirror = failrec->this_mirror;
- do {
- mirror = prev_mirror(failrec, mirror);
- btrfs_repair_io_failure(fs_info, ino, start, failrec->len,
- failrec->logical, page, pg_offset, mirror);
- } while (mirror != failrec->failed_mirror);
-
-out:
- free_io_failure(inode, failrec);
- return 0;
-}
-
-/*
- * Can be called when
- * - hold extent lock
- * - under ordered extent
- * - the inode is freeing
- */
-void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
-{
- struct io_failure_record *failrec;
- struct rb_node *node, *next;
-
- if (RB_EMPTY_ROOT(&inode->io_failure_tree))
- return;
-
- spin_lock(&inode->io_failure_lock);
- node = rb_simple_search_first(&inode->io_failure_tree, start);
- while (node) {
- failrec = rb_entry(node, struct io_failure_record, rb_node);
- if (failrec->bytenr > end)
- break;
-
- next = rb_next(node);
- rb_erase(&failrec->rb_node, &inode->io_failure_tree);
- kfree(failrec);
-
- node = next;
- }
- spin_unlock(&inode->io_failure_lock);
-}
-
-static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
- struct btrfs_bio *bbio,
- unsigned int bio_offset)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- u64 start = bbio->file_offset + bio_offset;
- struct io_failure_record *failrec;
- const u32 sectorsize = fs_info->sectorsize;
- int ret;
-
- failrec = get_failrec(BTRFS_I(inode), start);
- if (!IS_ERR(failrec)) {
- btrfs_debug(fs_info,
- "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
- failrec->logical, failrec->bytenr, failrec->len);
- /*
- * when data can be on disk more than twice, add to failrec here
- * (e.g. with a list for failed_mirror) to make
- * clean_io_failure() clean all those errors at once.
- */
- ASSERT(failrec->this_mirror == bbio->mirror_num);
- ASSERT(failrec->len == fs_info->sectorsize);
- return failrec;
- }
-
- failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
- if (!failrec)
- return ERR_PTR(-ENOMEM);
-
- RB_CLEAR_NODE(&failrec->rb_node);
- failrec->bytenr = start;
- failrec->len = sectorsize;
- failrec->failed_mirror = bbio->mirror_num;
- failrec->this_mirror = bbio->mirror_num;
- failrec->logical = (bbio->iter.bi_sector << SECTOR_SHIFT) + bio_offset;
-
- btrfs_debug(fs_info,
- "new io failure record logical %llu start %llu",
- failrec->logical, start);
-
- failrec->num_copies = btrfs_num_copies(fs_info, failrec->logical, sectorsize);
- if (failrec->num_copies == 1) {
- /*
- * We only have a single copy of the data, so don't bother with
- * all the retry and error correction code that follows. No
- * matter what the error is, it is very likely to persist.
- */
- btrfs_debug(fs_info,
- "cannot repair logical %llu num_copies %d",
- failrec->logical, failrec->num_copies);
- kfree(failrec);
- return ERR_PTR(-EIO);
- }
-
- /* Set the bits in the private failure tree */
- ret = insert_failrec(BTRFS_I(inode), failrec);
- if (ret) {
- kfree(failrec);
- return ERR_PTR(ret);
- }
-
- return failrec;
-}
-
-int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_bbio,
- u32 bio_offset, struct page *page, unsigned int pgoff,
- bool submit_buffered)
-{
- u64 start = failed_bbio->file_offset + bio_offset;
- struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct bio *failed_bio = &failed_bbio->bio;
- const int icsum = bio_offset >> fs_info->sectorsize_bits;
- struct bio *repair_bio;
- struct btrfs_bio *repair_bbio;
-
- btrfs_debug(fs_info,
- "repair read error: read error at %llu", start);
-
- BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
-
- failrec = btrfs_get_io_failure_record(&inode->vfs_inode, failed_bbio, bio_offset);
- if (IS_ERR(failrec))
- return PTR_ERR(failrec);
-
- /*
- * There are two premises:
- * a) deliver good data to the caller
- * b) correct the bad sectors on disk
- *
- * Since we're only doing repair for one sector, we only need to get
- * a good copy of the failed sector and if we succeed, we have setup
- * everything for btrfs_repair_io_failure to do the rest for us.
- */
- failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
- if (failrec->this_mirror == failrec->failed_mirror) {
- btrfs_debug(fs_info,
- "failed to repair num_copies %d this_mirror %d failed_mirror %d",
- failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
- free_io_failure(inode, failrec);
- return -EIO;
- }
-
- repair_bio = btrfs_bio_alloc(1, REQ_OP_READ, failed_bbio->end_io,
- failed_bbio->private);
- repair_bbio = btrfs_bio(repair_bio);
- repair_bbio->file_offset = start;
- repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
-
- if (failed_bbio->csum) {
- const u32 csum_size = fs_info->csum_size;
-
- repair_bbio->csum = repair_bbio->csum_inline;
- memcpy(repair_bbio->csum,
- failed_bbio->csum + csum_size * icsum, csum_size);
- }
-
- bio_add_page(repair_bio, page, failrec->len, pgoff);
- repair_bbio->iter = repair_bio->bi_iter;
-
- btrfs_debug(fs_info,
- "repair read error: submitting new read to mirror %d",
- failrec->this_mirror);
-
- /*
- * At this point we have a bio, so any errors from bio submission will
- * be handled by the endio on the repair_bio, so we can't return an
- * error here.
- */
- if (submit_buffered)
- btrfs_submit_data_read_bio(inode, repair_bio,
- failrec->this_mirror, 0);
- else
- btrfs_submit_dio_repair_bio(inode, repair_bio, failrec->this_mirror);
-
- return BLK_STS_OK;
-}
-
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
@@ -783,79 +542,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
btrfs_subpage_end_reader(fs_info, page, start, len);
}
-static void end_sector_io(struct page *page, u64 offset, bool uptodate)
-{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
- const u32 sectorsize = inode->root->fs_info->sectorsize;
-
- end_page_read(page, uptodate, offset, sectorsize);
- unlock_extent(&inode->io_tree, offset, offset + sectorsize - 1, NULL);
-}
-
-static void submit_data_read_repair(struct inode *inode,
- struct btrfs_bio *failed_bbio,
- u32 bio_offset, const struct bio_vec *bvec,
- unsigned int error_bitmap)
-{
- const unsigned int pgoff = bvec->bv_offset;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct page *page = bvec->bv_page;
- const u64 start = page_offset(bvec->bv_page) + bvec->bv_offset;
- const u64 end = start + bvec->bv_len - 1;
- const u32 sectorsize = fs_info->sectorsize;
- const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
- int i;
-
- BUG_ON(bio_op(&failed_bbio->bio) == REQ_OP_WRITE);
-
- /* This repair is only for data */
- ASSERT(is_data_inode(inode));
-
- /* We're here because we had some read errors or csum mismatch */
- ASSERT(error_bitmap);
-
- /*
- * We only get called on buffered IO, thus page must be mapped and bio
- * must not be cloned.
- */
- ASSERT(page->mapping && !bio_flagged(&failed_bbio->bio, BIO_CLONED));
-
- /* Iterate through all the sectors in the range */
- for (i = 0; i < nr_bits; i++) {
- const unsigned int offset = i * sectorsize;
- bool uptodate = false;
- int ret;
-
- if (!(error_bitmap & (1U << i))) {
- /*
- * This sector has no error, just end the page read
- * and unlock the range.
- */
- uptodate = true;
- goto next;
- }
-
- ret = btrfs_repair_one_sector(BTRFS_I(inode), failed_bbio,
- bio_offset + offset, page, pgoff + offset,
- true);
- if (!ret) {
- /*
- * We have submitted the read repair, the page release
- * will be handled by the endio function of the
- * submitted repair bio.
- * Thus we don't need to do any thing here.
- */
- continue;
- }
- /*
- * Continue on failed repair, otherwise the remaining sectors
- * will not be properly unlocked.
- */
-next:
- end_sector_io(page, start + offset, uptodate);
- }
-}
-
/* lots and lots of room for performance fixes in the end_bio funcs */
void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
@@ -899,7 +585,6 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
u64 start;
u64 end;
struct bvec_iter_all iter_all;
- bool first_bvec = true;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment_all(bvec, bio, iter_all) {
@@ -921,11 +606,6 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
start = page_offset(page) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
- if (first_bvec) {
- btrfs_record_physical_zoned(inode, start, bio);
- first_bvec = false;
- }
-
end_extent_writepage(page, error, start, end);
btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
@@ -1073,8 +753,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u32 sectorsize = fs_info->sectorsize;
- unsigned int error_bitmap = (unsigned int)-1;
- bool repair = false;
u64 start;
u64 end;
u32 len;
@@ -1106,25 +784,14 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
len = bvec->bv_len;
mirror = bbio->mirror_num;
- if (likely(uptodate)) {
- if (is_data_inode(inode)) {
- error_bitmap = btrfs_verify_data_csum(bbio,
- bio_offset, page, start, end);
- if (error_bitmap)
- uptodate = false;
- } else {
- if (btrfs_validate_metadata_buffer(bbio,
- page, start, end, mirror))
- uptodate = false;
- }
- }
+ if (uptodate && !is_data_inode(inode) &&
+ btrfs_validate_metadata_buffer(bbio, page, start, end, mirror))
+ uptodate = false;
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_SHIFT;
- btrfs_clean_io_failure(BTRFS_I(inode), start, page, 0);
-
/*
* Zero out the remaining part if this range straddles
* i_size.
@@ -1141,19 +808,7 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
zero_user_segment(page, zero_start,
offset_in_page(end) + 1);
}
- } else if (is_data_inode(inode)) {
- /*
- * Only try to repair bios that actually made it to a
- * device. If the bio failed to be submitted mirror
- * is 0 and we need to fail it without retrying.
- *
- * This also includes the high level bios for compressed
- * extents - these never make it to a device and repair
- * is already handled on the lower compressed bio.
- */
- if (mirror > 0)
- repair = true;
- } else {
+ } else if (!is_data_inode(inode)) {
struct extent_buffer *eb;
eb = find_extent_buffer_readpage(fs_info, page, start);
@@ -1162,19 +817,10 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
atomic_dec(&eb->io_pages);
}
- if (repair) {
- /*
- * submit_data_read_repair() will handle all the good
- * and bad sectors, we just continue to the next bvec.
- */
- submit_data_read_repair(inode, bbio, bio_offset, bvec,
- error_bitmap);
- } else {
- /* Update page status and unlock */
- end_page_read(page, uptodate, start, len);
- endio_readpage_release_extent(&processed, BTRFS_I(inode),
- start, end, PageUptodate(page));
- }
+ /* Update page status and unlock. */
+ end_page_read(page, uptodate, start, len);
+ endio_readpage_release_extent(&processed, BTRFS_I(inode),
+ start, end, PageUptodate(page));
ASSERT(bio_offset + len > bio_offset);
bio_offset += len;
@@ -1182,7 +828,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
- btrfs_bio_free_csum(bbio);
bio_put(bio);
}
@@ -1250,11 +895,10 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
bool contig = false;
- int ret;
ASSERT(bio);
/* The limit should be calculated when bio_ctrl->bio is allocated */
- ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
+ ASSERT(bio_ctrl->len_to_oe_boundary);
if (bio_ctrl->compress_type != compress_type)
return 0;
@@ -1290,9 +934,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (!contig)
return 0;
- real_size = min(bio_ctrl->len_to_oe_boundary,
- bio_ctrl->len_to_stripe_boundary) - bio_size;
- real_size = min(real_size, size);
+ real_size = min(bio_ctrl->len_to_oe_boundary - bio_size, size);
/*
* If real_size is 0, never call bio_add_*_page(), as even size is 0,
@@ -1301,82 +943,45 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (real_size == 0)
return 0;
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
- else
- ret = bio_add_page(bio, page, real_size, pg_offset);
-
- return ret;
+ return bio_add_page(bio, page, real_size, pg_offset);
}
-static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
- struct btrfs_inode *inode, u64 file_offset)
+static void calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
+ struct btrfs_inode *inode, u64 file_offset)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_io_geometry geom;
struct btrfs_ordered_extent *ordered;
- struct extent_map *em;
- u64 logical = (bio_ctrl->bio->bi_iter.bi_sector << SECTOR_SHIFT);
- int ret;
/*
- * Pages for compressed extent are never submitted to disk directly,
- * thus it has no real boundary, just set them to U32_MAX.
- *
- * The split happens for real compressed bio, which happens in
- * btrfs_submit_compressed_read/write().
+ * Limit the extent to the ordered boundary for Zone Append.
+ * Compressed bios aren't submitted directly, so it doesn't apply to
+ * them.
*/
- if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) {
- bio_ctrl->len_to_oe_boundary = U32_MAX;
- bio_ctrl->len_to_stripe_boundary = U32_MAX;
- return 0;
- }
- em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
- if (IS_ERR(em))
- return PTR_ERR(em);
- ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio_ctrl->bio),
- logical, &geom);
- free_extent_map(em);
- if (ret < 0) {
- return ret;
- }
- if (geom.len > U32_MAX)
- bio_ctrl->len_to_stripe_boundary = U32_MAX;
- else
- bio_ctrl->len_to_stripe_boundary = (u32)geom.len;
-
- if (bio_op(bio_ctrl->bio) != REQ_OP_ZONE_APPEND) {
- bio_ctrl->len_to_oe_boundary = U32_MAX;
- return 0;
- }
-
- /* Ordered extent not yet created, so we're good */
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
- if (!ordered) {
- bio_ctrl->len_to_oe_boundary = U32_MAX;
- return 0;
+ if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE &&
+ btrfs_use_zone_append(btrfs_bio(bio_ctrl->bio))) {
+ ordered = btrfs_lookup_ordered_extent(inode, file_offset);
+ if (ordered) {
+ bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
+ ordered->file_offset +
+ ordered->disk_num_bytes - file_offset);
+ btrfs_put_ordered_extent(ordered);
+ return;
+ }
}
- bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
- ordered->disk_bytenr + ordered->disk_num_bytes - logical);
- btrfs_put_ordered_extent(ordered);
- return 0;
+ bio_ctrl->len_to_oe_boundary = U32_MAX;
}
-static int alloc_new_bio(struct btrfs_inode *inode,
- struct btrfs_bio_ctrl *bio_ctrl,
- struct writeback_control *wbc,
- blk_opf_t opf,
- u64 disk_bytenr, u32 offset, u64 file_offset,
- enum btrfs_compression_type compress_type)
+static void alloc_new_bio(struct btrfs_inode *inode,
+ struct btrfs_bio_ctrl *bio_ctrl,
+ struct writeback_control *wbc, blk_opf_t opf,
+ u64 disk_bytenr, u32 offset, u64 file_offset,
+ enum btrfs_compression_type compress_type)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio;
- int ret;
- ASSERT(bio_ctrl->end_io_func);
-
- bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, bio_ctrl->end_io_func, NULL);
+ bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, inode, bio_ctrl->end_io_func,
+ NULL);
/*
* For compressed page range, its disk_bytenr is always @disk_bytenr
* passed in, no matter if we have added any range into previous bio.
@@ -1385,48 +990,21 @@ static int alloc_new_bio(struct btrfs_inode *inode,
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
else
bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
+ btrfs_bio(bio)->file_offset = file_offset;
bio_ctrl->bio = bio;
bio_ctrl->compress_type = compress_type;
- ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
- if (ret < 0)
- goto error;
+ calc_bio_boundaries(bio_ctrl, inode, file_offset);
if (wbc) {
/*
- * For Zone append we need the correct block_device that we are
- * going to write to set in the bio to be able to respect the
- * hardware limitation. Look it up here:
+ * Pick the last added device to support cgroup writeback. For
+ * multi-device file systems this means blk-cgroup policies have
+ * to always be set on the last added/replaced device.
+ * This is a bit odd but has been like that for a long time.
*/
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- struct btrfs_device *dev;
-
- dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
- fs_info->sectorsize);
- if (IS_ERR(dev)) {
- ret = PTR_ERR(dev);
- goto error;
- }
-
- bio_set_dev(bio, dev->bdev);
- } else {
- /*
- * Otherwise pick the last added device to support
- * cgroup writeback. For multi-device file systems this
- * means blk-cgroup policies have to always be set on the
- * last added/replaced device. This is a bit odd but has
- * been like that for a long time.
- */
- bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
- }
+ bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
wbc_init_bio(wbc, bio);
- } else {
- ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
}
- return 0;
-error:
- bio_ctrl->bio = NULL;
- btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
- return ret;
}
/*
@@ -1452,7 +1030,6 @@ static int submit_extent_page(blk_opf_t opf,
enum btrfs_compression_type compress_type,
bool force_bio_submit)
{
- int ret = 0;
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
unsigned int cur = pg_offset;
@@ -1472,12 +1049,9 @@ static int submit_extent_page(blk_opf_t opf,
/* Allocate new bio if needed */
if (!bio_ctrl->bio) {
- ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
- disk_bytenr, offset,
- page_offset(page) + cur,
- compress_type);
- if (ret < 0)
- return ret;
+ alloc_new_bio(inode, bio_ctrl, wbc, opf, disk_bytenr,
+ offset, page_offset(page) + cur,
+ compress_type);
}
/*
* We must go through btrfs_bio_add_page() to ensure each
@@ -2034,10 +1608,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
* find_next_dirty_byte() are all exclusive
*/
iosize = min(min(em_end, end + 1), dirty_range_end) - cur;
-
- if (btrfs_use_zone_append(inode, em->block_start))
- op = REQ_OP_ZONE_APPEND;
-
free_extent_map(em);
em = NULL;
@@ -2341,13 +1911,6 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
mapping_set_error(page->mapping, -EIO);
/*
- * If we error out, we should add back the dirty_metadata_bytes
- * to make it consistent.
- */
- percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
- eb->len, fs_info->dirty_metadata_batch);
-
- /*
* If writeback for a btree extent that doesn't belong to a log tree
* failed, increment the counter transaction->eb_write_errors.
* We do this because while the transaction is running and before it's
@@ -3806,6 +3369,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
lockend = round_up(start + len, inode->root->fs_info->sectorsize);
prev_extent_end = lockstart;
+ btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
@@ -3999,6 +3563,7 @@ check_eof_delalloc:
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
free_extent_state(delalloc_cached_state);
btrfs_free_backref_share_ctx(backref_ctx);
@@ -4702,12 +4267,25 @@ static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb)
WARN_ON(atomic_read(&eb->refs) == 0);
}
-void clear_extent_buffer_dirty(const struct extent_buffer *eb)
+void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
+ struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
int i;
int num_pages;
struct page *page;
+ btrfs_assert_tree_write_locked(eb);
+
+ if (trans && btrfs_header_generation(eb) != trans->transid)
+ return;
+
+ if (!test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags))
+ return;
+
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, -eb->len,
+ fs_info->dirty_metadata_batch);
+
if (eb->fs_info->nodesize < PAGE_SIZE)
return clear_subpage_extent_buffer_dirty(eb);
@@ -4829,6 +4407,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
struct extent_state *cached_state = NULL;
struct btrfs_bio_ctrl bio_ctrl = {
.mirror_num = mirror_num,
+ .parent_check = check,
};
int ret = 0;
@@ -4878,7 +4457,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
*/
atomic_dec(&eb->io_pages);
}
- memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check));
submit_one_bio(&bio_ctrl);
if (ret || wait != WAIT_COMPLETE) {
free_extent_state(cached_state);
@@ -4905,6 +4483,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
unsigned long num_reads = 0;
struct btrfs_bio_ctrl bio_ctrl = {
.mirror_num = mirror_num,
+ .parent_check = check,
};
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
@@ -4996,7 +4575,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
}
}
- memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check));
submit_one_bio(&bio_ctrl);
if (ret || wait != WAIT_COMPLETE)