summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c848
1 files changed, 226 insertions, 622 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a1adadd5d25d..a91d5ad27984 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -98,33 +98,16 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
*/
struct btrfs_bio_ctrl {
struct btrfs_bio *bbio;
- int mirror_num;
enum btrfs_compression_type compress_type;
u32 len_to_oe_boundary;
blk_opf_t opf;
btrfs_bio_end_io_t end_io_func;
struct writeback_control *wbc;
-
- /*
- * This is for metadata read, to provide the extra needed verification
- * info. This has to be provided for submit_one_bio(), as
- * submit_one_bio() can submit a bio if it ends at stripe boundary. If
- * no such parent_check is provided, the metadata can hit false alert at
- * endio time.
- */
- struct btrfs_tree_parent_check *parent_check;
-
- /*
- * Tell writepage not to lock the state bits for this range, it still
- * does the unlocking.
- */
- bool extent_locked;
};
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
{
struct btrfs_bio *bbio = bio_ctrl->bbio;
- int mirror_num = bio_ctrl->mirror_num;
if (!bbio)
return;
@@ -132,25 +115,11 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
/* Caller should ensure the bio has at least some range added */
ASSERT(bbio->bio.bi_iter.bi_size);
- if (!is_data_inode(&bbio->inode->vfs_inode)) {
- if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) {
- /*
- * For metadata read, we should have the parent_check,
- * and copy it to bbio for metadata verification.
- */
- ASSERT(bio_ctrl->parent_check);
- memcpy(&bbio->parent_check,
- bio_ctrl->parent_check,
- sizeof(struct btrfs_tree_parent_check));
- }
- bbio->bio.bi_opf |= REQ_META;
- }
-
if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
- btrfs_submit_compressed_read(bbio, mirror_num);
+ btrfs_submit_compressed_read(bbio);
else
- btrfs_submit_bio(bbio, mirror_num);
+ btrfs_submit_bio(bbio, 0);
/* The bbio is owned by the end_io handler now */
bio_ctrl->bbio = NULL;
@@ -248,8 +217,6 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
if (page_ops & PAGE_SET_ORDERED)
btrfs_page_clamp_set_ordered(fs_info, page, start, len);
- if (page_ops & PAGE_SET_ERROR)
- btrfs_page_clamp_set_error(fs_info, page, start, len);
if (page_ops & PAGE_START_WRITEBACK) {
btrfs_page_clamp_clear_dirty(fs_info, page, start, len);
btrfs_page_clamp_set_writeback(fs_info, page, start, len);
@@ -295,9 +262,6 @@ static int __process_pages_contig(struct address_space *mapping,
ASSERT(processed_end && *processed_end == start);
}
- if ((page_ops & PAGE_SET_ERROR) && start_index <= end_index)
- mapping_set_error(mapping, -EIO);
-
folio_batch_init(&fbatch);
while (index <= end_index) {
int found_folios;
@@ -506,6 +470,15 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
start, end, page_ops, NULL);
}
+static bool btrfs_verify_page(struct page *page, u64 start)
+{
+ if (!fsverity_active(page->mapping->host) ||
+ PageUptodate(page) ||
+ start >= i_size_read(page->mapping->host))
+ return true;
+ return fsverity_verify_page(page);
+}
+
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
@@ -513,20 +486,10 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
ASSERT(page_offset(page) <= start &&
start + len <= page_offset(page) + PAGE_SIZE);
- if (uptodate) {
- if (fsverity_active(page->mapping->host) &&
- !PageError(page) &&
- !PageUptodate(page) &&
- start < i_size_read(page->mapping->host) &&
- !fsverity_verify_page(page)) {
- btrfs_page_set_error(fs_info, page, start, len);
- } else {
- btrfs_page_set_uptodate(fs_info, page, start, len);
- }
- } else {
+ if (uptodate && btrfs_verify_page(page, start))
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ else
btrfs_page_clear_uptodate(fs_info, page, start, len);
- btrfs_page_set_error(fs_info, page, start, len);
- }
if (!btrfs_is_subpage(fs_info, page))
unlock_page(page);
@@ -554,7 +517,6 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
len = end + 1 - start;
btrfs_page_clear_uptodate(fs_info, page, start, len);
- btrfs_page_set_error(fs_info, page, start, len);
ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
@@ -574,8 +536,6 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
struct bio *bio = &bbio->bio;
int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
- u64 start;
- u64 end;
struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
@@ -584,6 +544,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u32 sectorsize = fs_info->sectorsize;
+ u64 start = page_offset(page) + bvec->bv_offset;
+ u32 len = bvec->bv_len;
/* Our read/write should always be sector aligned. */
if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
@@ -595,12 +557,12 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
"incomplete page write with offset %u and length %u",
bvec->bv_offset, bvec->bv_len);
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- end_extent_writepage(page, error, start, end);
-
- btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
+ btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error);
+ if (error) {
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+ mapping_set_error(page->mapping, error);
+ }
+ btrfs_page_clear_writeback(fs_info, page, start, len);
}
bio_put(bio);
@@ -686,35 +648,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
}
/*
- * Find extent buffer for a givne bytenr.
- *
- * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
- * in endio context.
- */
-static struct extent_buffer *find_extent_buffer_readpage(
- struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
-{
- struct extent_buffer *eb;
-
- /*
- * For regular sectorsize, we can use page->private to grab extent
- * buffer
- */
- if (fs_info->nodesize >= PAGE_SIZE) {
- ASSERT(PagePrivate(page) && page->private);
- return (struct extent_buffer *)page->private;
- }
-
- /* For subpage case, we need to lookup buffer radix tree */
- rcu_read_lock();
- eb = radix_tree_lookup(&fs_info->buffer_radix,
- bytenr >> fs_info->sectorsize_bits);
- rcu_read_unlock();
- ASSERT(eb);
- return eb;
-}
-
-/*
* after a readpage IO is done, we need to:
* clear the uptodate bits on error
* set the uptodate bits if things worked
@@ -735,7 +668,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
* larger than UINT_MAX, u32 here is enough.
*/
u32 bio_offset = 0;
- int mirror;
struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
@@ -775,11 +707,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
end = start + bvec->bv_len - 1;
len = bvec->bv_len;
- mirror = bbio->mirror_num;
- if (uptodate && !is_data_inode(inode) &&
- btrfs_validate_metadata_buffer(bbio, page, start, end, mirror))
- uptodate = false;
-
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_SHIFT;
@@ -800,19 +727,12 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
zero_user_segment(page, zero_start,
offset_in_page(end) + 1);
}
- } else if (!is_data_inode(inode)) {
- struct extent_buffer *eb;
-
- eb = find_extent_buffer_readpage(fs_info, page, start);
- set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
- eb->read_mirror = mirror;
- atomic_dec(&eb->io_pages);
}
/* Update page status and unlock. */
end_page_read(page, uptodate, start, len);
endio_readpage_release_extent(&processed, BTRFS_I(inode),
- start, end, PageUptodate(page));
+ start, end, uptodate);
ASSERT(bio_offset + len > bio_offset);
bio_offset += len;
@@ -906,13 +826,8 @@ static void alloc_new_bio(struct btrfs_inode *inode,
bio_ctrl->bbio = bbio;
bio_ctrl->len_to_oe_boundary = U32_MAX;
- /*
- * Limit the extent to the ordered boundary for Zone Append.
- * Compressed bios aren't submitted directly, so it doesn't apply to
- * them.
- */
- if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE &&
- btrfs_use_zone_append(bbio)) {
+ /* Limit data write bios to the ordered boundary. */
+ if (bio_ctrl->wbc) {
struct btrfs_ordered_extent *ordered;
ordered = btrfs_lookup_ordered_extent(inode, file_offset);
@@ -920,11 +835,9 @@ static void alloc_new_bio(struct btrfs_inode *inode,
bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
ordered->file_offset +
ordered->disk_num_bytes - file_offset);
- btrfs_put_ordered_extent(ordered);
+ bbio->ordered = ordered;
}
- }
- if (bio_ctrl->wbc) {
/*
* Pick the last added device to support cgroup writeback. For
* multi-device file systems this means blk-cgroup policies have
@@ -1125,7 +1038,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
ret = set_page_extent_mapped(page);
if (ret < 0) {
unlock_extent(tree, start, end, NULL);
- btrfs_page_set_error(fs_info, page, start, PAGE_SIZE);
unlock_page(page);
return ret;
}
@@ -1329,11 +1241,9 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
}
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
delalloc_end, &page_started, &nr_written, wbc);
- if (ret) {
- btrfs_page_set_error(inode->root->fs_info, page,
- page_offset(page), PAGE_SIZE);
+ if (ret)
return ret;
- }
+
/*
* delalloc_end is already one less than the total length, so
* we don't subtract one from PAGE_SIZE
@@ -1438,7 +1348,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
struct extent_map *em;
int ret = 0;
int nr = 0;
- bool compressed;
ret = btrfs_writepage_cow_fixup(page);
if (ret) {
@@ -1448,12 +1357,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
return 1;
}
- /*
- * we don't want to touch the inode after unlocking the page,
- * so we update the mapping writeback index now
- */
- bio_ctrl->wbc->nr_to_write--;
-
bio_ctrl->end_io_func = end_bio_extent_writepage;
while (cur <= end) {
u64 disk_bytenr;
@@ -1486,7 +1389,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
if (IS_ERR(em)) {
- btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
ret = PTR_ERR_OR_ZERO(em);
goto out_error;
}
@@ -1497,10 +1399,14 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
ASSERT(cur < end);
ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
+
block_start = em->block_start;
- compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
disk_bytenr = em->block_start + extent_offset;
+ ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
+ ASSERT(block_start != EXTENT_MAP_HOLE);
+ ASSERT(block_start != EXTENT_MAP_INLINE);
+
/*
* Note that em_end from extent_map_end() and dirty_range_end from
* find_next_dirty_byte() are all exclusive
@@ -1509,22 +1415,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
free_extent_map(em);
em = NULL;
- /*
- * compressed and inline extents are written through other
- * paths in the FS
- */
- if (compressed || block_start == EXTENT_MAP_HOLE ||
- block_start == EXTENT_MAP_INLINE) {
- if (compressed)
- nr++;
- else
- btrfs_writepage_endio_finish_ordered(inode,
- page, cur, cur + iosize - 1, true);
- btrfs_page_clear_dirty(fs_info, page, cur, iosize);
- cur += iosize;
- continue;
- }
-
btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(inode->root->fs_info,
@@ -1572,7 +1462,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
{
struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u64 page_start = page_offset(page);
const u64 page_end = page_start + PAGE_SIZE - 1;
int ret;
@@ -1585,9 +1474,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
WARN_ON(!PageLocked(page));
- btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
- page_offset(page), PAGE_SIZE);
-
pg_offset = offset_in_page(i_size);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
@@ -1600,77 +1486,30 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
ret = set_page_extent_mapped(page);
- if (ret < 0) {
- SetPageError(page);
+ if (ret < 0)
goto done;
- }
- if (!bio_ctrl->extent_locked) {
- ret = writepage_delalloc(BTRFS_I(inode), page, bio_ctrl->wbc);
- if (ret == 1)
- return 0;
- if (ret)
- goto done;
- }
+ ret = writepage_delalloc(BTRFS_I(inode), page, bio_ctrl->wbc);
+ if (ret == 1)
+ return 0;
+ if (ret)
+ goto done;
ret = __extent_writepage_io(BTRFS_I(inode), page, bio_ctrl, i_size, &nr);
if (ret == 1)
return 0;
+ bio_ctrl->wbc->nr_to_write--;
+
done:
if (nr == 0) {
/* make sure the mapping tag for page dirty gets cleared */
set_page_writeback(page);
end_page_writeback(page);
}
- /*
- * Here we used to have a check for PageError() and then set @ret and
- * call end_extent_writepage().
- *
- * But in fact setting @ret here will cause different error paths
- * between subpage and regular sectorsize.
- *
- * For regular page size, we never submit current page, but only add
- * current page to current bio.
- * The bio submission can only happen in next page.
- * Thus if we hit the PageError() branch, @ret is already set to
- * non-zero value and will not get updated for regular sectorsize.
- *
- * But for subpage case, it's possible we submit part of current page,
- * thus can get PageError() set by submitted bio of the same page,
- * while our @ret is still 0.
- *
- * So here we unify the behavior and don't set @ret.
- * Error can still be properly passed to higher layer as page will
- * be set error, here we just don't handle the IO failure.
- *
- * NOTE: This is just a hotfix for subpage.
- * The root fix will be properly ending ordered extent when we hit
- * an error during writeback.
- *
- * But that needs a bigger refactoring, as we not only need to grab the
- * submitted OE, but also need to know exactly at which bytenr we hit
- * the error.
- * Currently the full page based __extent_writepage_io() is not
- * capable of that.
- */
- if (PageError(page))
+ if (ret)
end_extent_writepage(page, ret, page_start, page_end);
- if (bio_ctrl->extent_locked) {
- struct writeback_control *wbc = bio_ctrl->wbc;
-
- /*
- * If bio_ctrl->extent_locked, it's from extent_write_locked_range(),
- * the page can either be locked by lock_page() or
- * process_one_page().
- * Let btrfs_page_unlock_writer() handle both cases.
- */
- ASSERT(wbc);
- btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
- wbc->range_end + 1 - wbc->range_start);
- } else {
- unlock_page(page);
- }
+ unlock_page(page);
ASSERT(ret <= 0);
return ret;
}
@@ -1681,52 +1520,26 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
TASK_UNINTERRUPTIBLE);
}
-static void end_extent_buffer_writeback(struct extent_buffer *eb)
-{
- clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_atomic();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
-}
-
/*
* Lock extent buffer status and pages for writeback.
*
- * May try to flush write bio if we can't get the lock.
- *
- * Return 0 if the extent buffer doesn't need to be submitted.
- * (E.g. the extent buffer is not dirty)
- * Return >0 is the extent buffer is submitted to bio.
- * Return <0 if something went wrong, no page is locked.
+ * Return %false if the extent buffer doesn't need to be submitted (e.g. the
+ * extent buffer is not dirty)
+ * Return %true is the extent buffer is submitted to bio.
*/
-static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
- struct btrfs_bio_ctrl *bio_ctrl)
+static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *eb,
+ struct writeback_control *wbc)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- int i, num_pages;
- int flush = 0;
- int ret = 0;
+ bool ret = false;
- if (!btrfs_try_tree_write_lock(eb)) {
- submit_write_bio(bio_ctrl, 0);
- flush = 1;
- btrfs_tree_lock(eb);
- }
-
- if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
+ btrfs_tree_lock(eb);
+ while (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
btrfs_tree_unlock(eb);
- if (bio_ctrl->wbc->sync_mode != WB_SYNC_ALL)
- return 0;
- if (!flush) {
- submit_write_bio(bio_ctrl, 0);
- flush = 1;
- }
- while (1) {
- wait_on_extent_buffer_writeback(eb);
- btrfs_tree_lock(eb);
- if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
- break;
- btrfs_tree_unlock(eb);
- }
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ return false;
+ wait_on_extent_buffer_writeback(eb);
+ btrfs_tree_lock(eb);
}
/*
@@ -1742,45 +1555,19 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
-eb->len,
fs_info->dirty_metadata_batch);
- ret = 1;
+ ret = true;
} else {
spin_unlock(&eb->refs_lock);
}
-
btrfs_tree_unlock(eb);
-
- /*
- * Either we don't need to submit any tree block, or we're submitting
- * subpage eb.
- * Subpage metadata doesn't use page locking at all, so we can skip
- * the page locking.
- */
- if (!ret || fs_info->nodesize < PAGE_SIZE)
- return ret;
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- struct page *p = eb->pages[i];
-
- if (!trylock_page(p)) {
- if (!flush) {
- submit_write_bio(bio_ctrl, 0);
- flush = 1;
- }
- lock_page(p);
- }
- }
-
return ret;
}
-static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
+static void set_btree_ioerr(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- btrfs_page_set_error(fs_info, page, eb->start, eb->len);
- if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
- return;
+ set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
/*
* A read may stumble upon this buffer later, make sure that it gets an
@@ -1794,7 +1581,7 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
* return a 0 because we are readonly if we don't modify the err seq for
* the superblock.
*/
- mapping_set_error(page->mapping, -EIO);
+ mapping_set_error(eb->fs_info->btree_inode->i_mapping, -EIO);
/*
* If writeback for a btree extent that doesn't belong to a log tree
@@ -1869,101 +1656,34 @@ static struct extent_buffer *find_extent_buffer_nolock(
return NULL;
}
-/*
- * The endio function for subpage extent buffer write.
- *
- * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
- * after all extent buffers in the page has finished their writeback.
- */
-static void end_bio_subpage_eb_writepage(struct btrfs_bio *bbio)
+static void extent_buffer_write_end_io(struct btrfs_bio *bbio)
{
- struct bio *bio = &bbio->bio;
- struct btrfs_fs_info *fs_info;
- struct bio_vec *bvec;
+ struct extent_buffer *eb = bbio->private;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ bool uptodate = !bbio->bio.bi_status;
struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ u32 bio_offset = 0;
- fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
- ASSERT(fs_info->nodesize < PAGE_SIZE);
+ if (!uptodate)
+ set_btree_ioerr(eb);
- ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
+ bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
+ u64 start = eb->start + bio_offset;
struct page *page = bvec->bv_page;
- u64 bvec_start = page_offset(page) + bvec->bv_offset;
- u64 bvec_end = bvec_start + bvec->bv_len - 1;
- u64 cur_bytenr = bvec_start;
-
- ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
-
- /* Iterate through all extent buffers in the range */
- while (cur_bytenr <= bvec_end) {
- struct extent_buffer *eb;
- int done;
-
- /*
- * Here we can't use find_extent_buffer(), as it may
- * try to lock eb->refs_lock, which is not safe in endio
- * context.
- */
- eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
- ASSERT(eb);
-
- cur_bytenr = eb->start + eb->len;
-
- ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
- done = atomic_dec_and_test(&eb->io_pages);
- ASSERT(done);
-
- if (bio->bi_status ||
- test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
- ClearPageUptodate(page);
- set_btree_ioerr(page, eb);
- }
+ u32 len = bvec->bv_len;
- btrfs_subpage_clear_writeback(fs_info, page, eb->start,
- eb->len);
- end_extent_buffer_writeback(eb);
- /*
- * free_extent_buffer() will grab spinlock which is not
- * safe in endio context. Thus here we manually dec
- * the ref.
- */
- atomic_dec(&eb->refs);
- }
+ if (!uptodate)
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+ btrfs_page_clear_writeback(fs_info, page, start, len);
+ bio_offset += len;
}
- bio_put(bio);
-}
-static void end_bio_extent_buffer_writepage(struct btrfs_bio *bbio)
-{
- struct bio *bio = &bbio->bio;
- struct bio_vec *bvec;
- struct extent_buffer *eb;
- int done;
- struct bvec_iter_all iter_all;
-
- ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
-
- eb = (struct extent_buffer *)page->private;
- BUG_ON(!eb);
- done = atomic_dec_and_test(&eb->io_pages);
-
- if (bio->bi_status ||
- test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
- ClearPageUptodate(page);
- set_btree_ioerr(page, eb);
- }
-
- end_page_writeback(page);
-
- if (!done)
- continue;
-
- end_extent_buffer_writeback(eb);
- }
+ clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
- bio_put(bio);
+ bio_put(&bbio->bio);
}
static void prepare_eb_write(struct extent_buffer *eb)
@@ -1973,7 +1693,6 @@ static void prepare_eb_write(struct extent_buffer *eb)
unsigned long end;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
- atomic_set(&eb->io_pages, num_extent_pages(eb));
/* Set btree blocks beyond nritems with 0 to avoid stale content */
nritems = btrfs_header_nritems(eb);
@@ -1995,63 +1714,49 @@ static void prepare_eb_write(struct extent_buffer *eb)
}
}
-/*
- * Unlike the work in write_one_eb(), we rely completely on extent locking.
- * Page locking is only utilized at minimum to keep the VMM code happy.
- */
-static void write_one_subpage_eb(struct extent_buffer *eb,
- struct btrfs_bio_ctrl *bio_ctrl)
-{
- struct btrfs_fs_info *fs_info = eb->fs_info;
- struct page *page = eb->pages[0];
- bool no_dirty_ebs = false;
-
- prepare_eb_write(eb);
-
- /* clear_page_dirty_for_io() in subpage helper needs page locked */
- lock_page(page);
- btrfs_subpage_set_writeback(fs_info, page, eb->start, eb->len);
-
- /* Check if this is the last dirty bit to update nr_written */
- no_dirty_ebs = btrfs_subpage_clear_and_test_dirty(fs_info, page,
- eb->start, eb->len);
- if (no_dirty_ebs)
- clear_page_dirty_for_io(page);
-
- bio_ctrl->end_io_func = end_bio_subpage_eb_writepage;
-
- submit_extent_page(bio_ctrl, eb->start, page, eb->len,
- eb->start - page_offset(page));
- unlock_page(page);
- /*
- * Submission finished without problem, if no range of the page is
- * dirty anymore, we have submitted a page. Update nr_written in wbc.
- */
- if (no_dirty_ebs)
- bio_ctrl->wbc->nr_to_write--;
-}
-
static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
- struct btrfs_bio_ctrl *bio_ctrl)
+ struct writeback_control *wbc)
{
- u64 disk_bytenr = eb->start;
- int i, num_pages;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct btrfs_bio *bbio;
prepare_eb_write(eb);
- bio_ctrl->end_io_func = end_bio_extent_buffer_writepage;
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- struct page *p = eb->pages[i];
-
- clear_page_dirty_for_io(p);
- set_page_writeback(p);
- submit_extent_page(bio_ctrl, disk_bytenr, p, PAGE_SIZE, 0);
- disk_bytenr += PAGE_SIZE;
- bio_ctrl->wbc->nr_to_write--;
+ bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
+ REQ_OP_WRITE | REQ_META | wbc_to_write_flags(wbc),
+ eb->fs_info, extent_buffer_write_end_io, eb);
+ bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
+ bio_set_dev(&bbio->bio, fs_info->fs_devices->latest_dev->bdev);
+ wbc_init_bio(wbc, &bbio->bio);
+ bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
+ bbio->file_offset = eb->start;
+ if (fs_info->nodesize < PAGE_SIZE) {
+ struct page *p = eb->pages[0];
+
+ lock_page(p);
+ btrfs_subpage_set_writeback(fs_info, p, eb->start, eb->len);
+ if (btrfs_subpage_clear_and_test_dirty(fs_info, p, eb->start,
+ eb->len)) {
+ clear_page_dirty_for_io(p);
+ wbc->nr_to_write--;
+ }
+ __bio_add_page(&bbio->bio, p, eb->len, eb->start - page_offset(p));
+ wbc_account_cgroup_owner(wbc, p, eb->len);
unlock_page(p);
+ } else {
+ for (int i = 0; i < num_extent_pages(eb); i++) {
+ struct page *p = eb->pages[i];
+
+ lock_page(p);
+ clear_page_dirty_for_io(p);
+ set_page_writeback(p);
+ __bio_add_page(&bbio->bio, p, PAGE_SIZE, 0);
+ wbc_account_cgroup_owner(wbc, p, PAGE_SIZE);
+ wbc->nr_to_write--;
+ unlock_page(p);
+ }
}
+ btrfs_submit_bio(bbio, 0);
}
/*
@@ -2068,14 +1773,13 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
* Return >=0 for the number of submitted extent buffers.
* Return <0 for fatal error.
*/
-static int submit_eb_subpage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl)
+static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
int submitted = 0;
u64 page_start = page_offset(page);
int bit_start = 0;
int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
- int ret;
/* Lock and write each dirty extent buffers in the range */
while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
@@ -2121,25 +1825,13 @@ static int submit_eb_subpage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl)
if (!eb)
continue;
- ret = lock_extent_buffer_for_io(eb, bio_ctrl);
- if (ret == 0) {
- free_extent_buffer(eb);
- continue;
+ if (lock_extent_buffer_for_io(eb, wbc)) {
+ write_one_eb(eb, wbc);
+ submitted++;
}
- if (ret < 0) {
- free_extent_buffer(eb);
- goto cleanup;
- }
- write_one_subpage_eb(eb, bio_ctrl);
free_extent_buffer(eb);
- submitted++;
}
return submitted;
-
-cleanup:
- /* We hit error, end bio for the submitted extent buffers */
- submit_write_bio(bio_ctrl, ret);
- return ret;
}
/*
@@ -2162,7 +1854,7 @@ cleanup:
* previous call.
* Return <0 for fatal error.
*/
-static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
+static int submit_eb_page(struct page *page, struct writeback_control *wbc,
struct extent_buffer **eb_context)
{
struct address_space *mapping = page->mapping;
@@ -2174,7 +1866,7 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
return 0;
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
- return submit_eb_subpage(page, bio_ctrl);
+ return submit_eb_subpage(page, wbc);
spin_lock(&mapping->private_lock);
if (!PagePrivate(page)) {
@@ -2207,8 +1899,7 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
* If for_sync, this hole will be filled with
* trasnsaction commit.
*/
- if (bio_ctrl->wbc->sync_mode == WB_SYNC_ALL &&
- !bio_ctrl->wbc->for_sync)
+ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
ret = -EAGAIN;
else
ret = 0;
@@ -2218,13 +1909,12 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
*eb_context = eb;
- ret = lock_extent_buffer_for_io(eb, bio_ctrl);
- if (ret <= 0) {
+ if (!lock_extent_buffer_for_io(eb, wbc)) {
btrfs_revert_meta_write_pointer(cache, eb);
if (cache)
btrfs_put_block_group(cache);
free_extent_buffer(eb);
- return ret;
+ return 0;
}
if (cache) {
/*
@@ -2233,7 +1923,7 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
btrfs_schedule_zone_finish_bg(cache, eb);
btrfs_put_block_group(cache);
}
- write_one_eb(eb, bio_ctrl);
+ write_one_eb(eb, wbc);
free_extent_buffer(eb);
return 1;
}
@@ -2242,11 +1932,6 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct extent_buffer *eb_context = NULL;
- struct btrfs_bio_ctrl bio_ctrl = {
- .wbc = wbc,
- .opf = REQ_OP_WRITE | wbc_to_write_flags(wbc),
- .extent_locked = 0,
- };
struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
int ret = 0;
int done = 0;
@@ -2288,7 +1973,7 @@ retry:
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
- ret = submit_eb_page(&folio->page, &bio_ctrl, &eb_context);
+ ret = submit_eb_page(&folio->page, wbc, &eb_context);
if (ret == 0)
continue;
if (ret < 0) {
@@ -2349,8 +2034,6 @@ retry:
ret = 0;
if (!ret && BTRFS_FS_ERROR(fs_info))
ret = -EROFS;
- submit_write_bio(&bio_ctrl, ret);
-
btrfs_zoned_meta_io_unlock(fs_info);
return ret;
}
@@ -2520,38 +2203,31 @@ retry:
* already been ran (aka, ordered extent inserted) and all pages are still
* locked.
*/
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
+int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
+ struct writeback_control *wbc)
{
bool found_error = false;
int first_error = 0;
int ret = 0;
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
+ loff_t i_size = i_size_read(inode);
u64 cur = start;
- unsigned long nr_pages;
- const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
- struct writeback_control wbc_writepages = {
- .sync_mode = WB_SYNC_ALL,
- .range_start = start,
- .range_end = end + 1,
- .no_cgroup_owner = 1,
- };
struct btrfs_bio_ctrl bio_ctrl = {
- .wbc = &wbc_writepages,
- /* We're called from an async helper function */
- .opf = REQ_OP_WRITE | REQ_BTRFS_CGROUP_PUNT |
- wbc_to_write_flags(&wbc_writepages),
- .extent_locked = 1,
+ .wbc = wbc,
+ .opf = REQ_OP_WRITE | wbc_to_write_flags(wbc),
};
+ if (wbc->no_cgroup_owner)
+ bio_ctrl.opf |= REQ_BTRFS_CGROUP_PUNT;
+
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
- nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
- PAGE_SHIFT;
- wbc_writepages.nr_to_write = nr_pages * 2;
- wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
while (cur <= end) {
u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
+ struct page *page;
+ int nr = 0;
page = find_get_page(mapping, cur >> PAGE_SHIFT);
/*
@@ -2562,19 +2238,31 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
ASSERT(PageLocked(page));
ASSERT(PageDirty(page));
clear_page_dirty_for_io(page);
- ret = __extent_writepage(page, &bio_ctrl);
- ASSERT(ret <= 0);
+
+ ret = __extent_writepage_io(BTRFS_I(inode), page, &bio_ctrl,
+ i_size, &nr);
+ if (ret == 1)
+ goto next_page;
+
+ /* Make sure the mapping tag for page dirty gets cleared. */
+ if (nr == 0) {
+ set_page_writeback(page);
+ end_page_writeback(page);
+ }
+ if (ret)
+ end_extent_writepage(page, ret, cur, cur_end);
+ btrfs_page_unlock_writer(fs_info, page, cur, cur_end + 1 - cur);
if (ret < 0) {
found_error = true;
first_error = ret;
}
+next_page:
put_page(page);
cur = cur_end + 1;
}
submit_write_bio(&bio_ctrl, found_error ? ret : 0);
- wbc_detach_inode(&wbc_writepages);
if (found_error)
return first_error;
return ret;
@@ -2588,7 +2276,6 @@ int extent_writepages(struct address_space *mapping,
struct btrfs_bio_ctrl bio_ctrl = {
.wbc = wbc,
.opf = REQ_OP_WRITE | wbc_to_write_flags(wbc),
- .extent_locked = 0,
};
/*
@@ -2679,8 +2366,7 @@ static int try_release_extent_state(struct extent_io_tree *tree,
* The delalloc new bit will be cleared by ordered extent
* completion.
*/
- ret = __clear_extent_bit(tree, start, end, clear_bits, NULL,
- mask, NULL);
+ ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
/* if clear_extent_bit failed for enomem reasons,
* we can't allow the release to continue.
@@ -3421,10 +3107,9 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
-int extent_buffer_under_io(const struct extent_buffer *eb)
+static int extent_buffer_under_io(const struct extent_buffer *eb)
{
- return (atomic_read(&eb->io_pages) ||
- test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
}
@@ -3557,11 +3242,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
init_rwsem(&eb->lock);
btrfs_leak_debug_add_eb(eb);
- INIT_LIST_HEAD(&eb->release_list);
spin_lock_init(&eb->refs_lock);
atomic_set(&eb->refs, 1);
- atomic_set(&eb->io_pages, 0);
ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
@@ -3678,9 +3361,9 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* adequately protected by the refcount, but the TREE_REF bit and
* its corresponding reference are not. To protect against this
* class of races, we call check_buffer_tree_ref from the codepaths
- * which trigger io after they set eb->io_pages. Note that once io is
- * initiated, TREE_REF can no longer be cleared, so that is the
- * moment at which any such race is best fixed.
+ * which trigger io. Note that once io is initiated, TREE_REF can no
+ * longer be cleared, so that is the moment at which any such race is
+ * best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -3939,7 +3622,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
eb->pages[i] = p;
- if (!PageUptodate(p))
+ if (!btrfs_page_test_uptodate(fs_info, p, eb->start, eb->len))
uptodate = 0;
/*
@@ -4142,13 +3825,12 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
continue;
lock_page(page);
btree_clear_page_dirty(page);
- ClearPageError(page);
unlock_page(page);
}
WARN_ON(atomic_read(&eb->refs) == 0);
}
-bool set_extent_buffer_dirty(struct extent_buffer *eb)
+void set_extent_buffer_dirty(struct extent_buffer *eb)
{
int i;
int num_pages;
@@ -4183,13 +3865,14 @@ bool set_extent_buffer_dirty(struct extent_buffer *eb)
eb->start, eb->len);
if (subpage)
unlock_page(eb->pages[0]);
+ percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes,
+ eb->len,
+ eb->fs_info->dirty_metadata_batch);
}
#ifdef CONFIG_BTRFS_DEBUG
for (i = 0; i < num_pages; i++)
ASSERT(PageDirty(eb->pages[i]));
#endif
-
- return was_dirty;
}
void clear_extent_buffer_uptodate(struct extent_buffer *eb)
@@ -4242,84 +3925,54 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
}
-static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
- int mirror_num,
- struct btrfs_tree_parent_check *check)
+static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
{
+ struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
- struct extent_io_tree *io_tree;
- struct page *page = eb->pages[0];
- struct extent_state *cached_state = NULL;
- struct btrfs_bio_ctrl bio_ctrl = {
- .opf = REQ_OP_READ,
- .mirror_num = mirror_num,
- .parent_check = check,
- };
- int ret;
+ bool uptodate = !bbio->bio.bi_status;
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ u32 bio_offset = 0;
- ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
- ASSERT(PagePrivate(page));
- ASSERT(check);
- io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
+ eb->read_mirror = bbio->mirror_num;
- if (wait == WAIT_NONE) {
- if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state))
- return -EAGAIN;
- } else {
- ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- if (ret < 0)
- return ret;
- }
+ if (uptodate &&
+ btrfs_validate_extent_buffer(eb, &bbio->parent_check) < 0)
+ uptodate = false;
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) ||
- PageUptodate(page) ||
- btrfs_subpage_test_uptodate(fs_info, page, eb->start, eb->len)) {
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- return 0;
+ if (uptodate) {
+ set_extent_buffer_uptodate(eb);
+ } else {
+ clear_extent_buffer_uptodate(eb);
+ set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
}
- clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
- eb->read_mirror = 0;
- atomic_set(&eb->io_pages, 1);
- check_buffer_tree_ref(eb);
- bio_ctrl.end_io_func = end_bio_extent_readpage;
+ bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
+ u64 start = eb->start + bio_offset;
+ struct page *page = bvec->bv_page;
+ u32 len = bvec->bv_len;
- btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
+ if (uptodate)
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ else
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
- btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
- submit_extent_page(&bio_ctrl, eb->start, page, eb->len,
- eb->start - page_offset(page));
- submit_one_bio(&bio_ctrl);
- if (wait != WAIT_COMPLETE) {
- free_extent_state(cached_state);
- return 0;
+ bio_offset += len;
}
- wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1,
- EXTENT_LOCKED, &cached_state);
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return -EIO;
- return 0;
+ clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
+ free_extent_buffer(eb);
+
+ bio_put(&bbio->bio);
}
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
struct btrfs_tree_parent_check *check)
{
- int i;
- struct page *page;
- int locked_pages = 0;
- int all_uptodate = 1;
- int num_pages;
- unsigned long num_reads = 0;
- struct btrfs_bio_ctrl bio_ctrl = {
- .opf = REQ_OP_READ,
- .mirror_num = mirror_num,
- .parent_check = check,
- };
+ int num_pages = num_extent_pages(eb), i;
+ struct btrfs_bio *bbio;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
@@ -4332,87 +3985,39 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
return -EIO;
- if (eb->fs_info->nodesize < PAGE_SIZE)
- return read_extent_buffer_subpage(eb, wait, mirror_num, check);
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- if (wait == WAIT_NONE) {
- /*
- * WAIT_NONE is only utilized by readahead. If we can't
- * acquire the lock atomically it means either the eb
- * is being read out or under modification.
- * Either way the eb will be or has been cached,
- * readahead can exit safely.
- */
- if (!trylock_page(page))
- goto unlock_exit;
- } else {
- lock_page(page);
- }
- locked_pages++;
- }
- /*
- * We need to firstly lock all pages to make sure that
- * the uptodate bit of our pages won't be affected by
- * clear_extent_buffer_uptodate().
- */
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- if (!PageUptodate(page)) {
- num_reads++;
- all_uptodate = 0;
- }
- }
-
- if (all_uptodate) {
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- goto unlock_exit;
- }
+ /* Someone else is already reading the buffer, just wait for it. */
+ if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
+ goto done;
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
- atomic_set(&eb->io_pages, num_reads);
- /*
- * It is possible for release_folio to clear the TREE_REF bit before we
- * set io_pages. See check_buffer_tree_ref for a more detailed comment.
- */
check_buffer_tree_ref(eb);
- bio_ctrl.end_io_func = end_bio_extent_readpage;
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
-
- if (!PageUptodate(page)) {
- ClearPageError(page);
- submit_extent_page(&bio_ctrl, page_offset(page), page,
- PAGE_SIZE, 0);
- } else {
- unlock_page(page);
- }
+ atomic_inc(&eb->refs);
+
+ bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
+ REQ_OP_READ | REQ_META, eb->fs_info,
+ extent_buffer_read_end_io, eb);
+ bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
+ bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
+ bbio->file_offset = eb->start;
+ memcpy(&bbio->parent_check, check, sizeof(*check));
+ if (eb->fs_info->nodesize < PAGE_SIZE) {
+ __bio_add_page(&bbio->bio, eb->pages[0], eb->len,
+ eb->start - page_offset(eb->pages[0]));
+ } else {
+ for (i = 0; i < num_pages; i++)
+ __bio_add_page(&bbio->bio, eb->pages[i], PAGE_SIZE, 0);
}
+ btrfs_submit_bio(bbio, mirror_num);
- submit_one_bio(&bio_ctrl);
-
- if (wait != WAIT_COMPLETE)
- return 0;
-
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- wait_on_page_locked(page);
- if (!PageUptodate(page))
+done:
+ if (wait == WAIT_COMPLETE) {
+ wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return -EIO;
}
return 0;
-
-unlock_exit:
- while (locked_pages > 0) {
- locked_pages--;
- page = eb->pages[locked_pages];
- unlock_page(page);
- }
- return 0;
}
static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
@@ -4561,18 +4166,17 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
* looked up. We don't want to complain in this case, as the page was
* valid before, we just didn't write it out. Instead we want to catch
* the case where we didn't actually read the block properly, which
- * would have !PageUptodate && !PageError, as we clear PageError before
- * reading.
+ * would have !PageUptodate and !EXTENT_BUFFER_WRITE_ERR.
*/
- if (fs_info->nodesize < PAGE_SIZE) {
- bool uptodate, error;
+ if (test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
+ return;
- uptodate = btrfs_subpage_test_uptodate(fs_info, page,
- eb->start, eb->len);
- error = btrfs_subpage_test_error(fs_info, page, eb->start, eb->len);
- WARN_ON(!uptodate && !error);
+ if (fs_info->nodesize < PAGE_SIZE) {
+ if (WARN_ON(!btrfs_subpage_test_uptodate(fs_info, page,
+ eb->start, eb->len)))
+ btrfs_subpage_dump_bitmap(fs_info, page, eb->start, eb->len);
} else {
- WARN_ON(!PageUptodate(page) && !PageError(page));
+ WARN_ON(!PageUptodate(page));
}
}