summaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c232
1 files changed, 106 insertions, 126 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 836725a19661..4cae41bd6de0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -134,8 +134,14 @@ struct scrub_stripe {
* The errors hit during the initial read of the stripe.
*
* Would be utilized for error reporting and repair.
+ *
+ * The remaining init_nr_* records the number of errors hit, only used
+ * by error reporting.
*/
unsigned long init_error_bitmap;
+ unsigned int init_nr_io_errors;
+ unsigned int init_nr_csum_errors;
+ unsigned int init_nr_meta_errors;
/*
* The following error bitmaps are all for the current status.
@@ -171,7 +177,6 @@ struct scrub_ctx {
struct btrfs_fs_info *fs_info;
int first_free;
int cur_stripe;
- struct list_head csum_list;
atomic_t cancel_req;
int readonly;
int sectors_per_bio;
@@ -303,17 +308,6 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
scrub_pause_off(fs_info);
}
-static void scrub_free_csums(struct scrub_ctx *sctx)
-{
- while (!list_empty(&sctx->csum_list)) {
- struct btrfs_ordered_sum *sum;
- sum = list_first_entry(&sctx->csum_list,
- struct btrfs_ordered_sum, list);
- list_del(&sum->list);
- kfree(sum);
- }
-}
-
static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
{
int i;
@@ -324,7 +318,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
release_scrub_stripe(&sctx->stripes[i]);
- scrub_free_csums(sctx);
kfree(sctx);
}
@@ -346,7 +339,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
- INIT_LIST_HEAD(&sctx->csum_list);
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
int ret;
@@ -473,11 +465,8 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
struct scrub_warning swarn;
- unsigned long ptr = 0;
u64 flags = 0;
- u64 ref_root;
u32 item_size;
- u8 ref_level = 0;
int ret;
/* Super block error, no need to search extent tree. */
@@ -507,19 +496,28 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
item_size = btrfs_item_size(eb, path->slots[0]);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- do {
+ unsigned long ptr = 0;
+ u8 ref_level;
+ u64 ref_root;
+
+ while (true) {
ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
item_size, &ref_root,
&ref_level);
+ if (ret < 0) {
+ btrfs_warn(fs_info,
+ "failed to resolve tree backref for logical %llu: %d",
+ swarn.logical, ret);
+ break;
+ }
+ if (ret > 0)
+ break;
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
- errstr, swarn.logical,
- btrfs_dev_name(dev),
- swarn.physical,
- ref_level ? "node" : "leaf",
- ret < 0 ? -1 : ref_level,
- ret < 0 ? -1 : ref_root);
- } while (ret != 1);
+ errstr, swarn.logical, btrfs_dev_name(dev),
+ swarn.physical, (ref_level ? "node" : "leaf"),
+ ref_level, ref_root);
+ }
btrfs_release_path(path);
} else {
struct btrfs_backref_walk_ctx ctx = { 0 };
@@ -540,48 +538,6 @@ out:
btrfs_free_path(path);
}
-static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
-{
- if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
- return 2;
- else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
- return 3;
- else
- return (int)bioc->num_stripes;
-}
-
-static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
- u64 full_stripe_logical,
- int nstripes, int mirror,
- int *stripe_index,
- u64 *stripe_offset)
-{
- int i;
-
- if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
- nstripes - 1 : nstripes - 2;
-
- /* RAID5/6 */
- for (i = 0; i < nr_data_stripes; i++) {
- const u64 data_stripe_start = full_stripe_logical +
- (i * BTRFS_STRIPE_LEN);
-
- if (logical >= data_stripe_start &&
- logical < data_stripe_start + BTRFS_STRIPE_LEN)
- break;
- }
-
- *stripe_index = i;
- *stripe_offset = (logical - full_stripe_logical) &
- BTRFS_STRIPE_LEN_MASK;
- } else {
- /* The other RAID type */
- *stripe_index = mirror;
- *stripe_offset = 0;
- }
-}
-
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
{
int ret = 0;
@@ -918,8 +874,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
/* For scrub, our mirror_num should always start at 1. */
ASSERT(stripe->mirror_num >= 1);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
- stripe->logical, &mapped_len, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+ stripe->logical, &mapped_len, &bioc,
+ NULL, NULL, 1);
/*
* If we failed, dev will be NULL, and later detailed reports
* will just be skipped.
@@ -1003,12 +960,9 @@ skip:
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
sctx->stat.no_csum += nr_nodatacsum_sectors;
- sctx->stat.read_errors +=
- bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors);
- sctx->stat.csum_errors +=
- bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors);
- sctx->stat.verify_errors +=
- bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors);
+ sctx->stat.read_errors += stripe->init_nr_io_errors;
+ sctx->stat.csum_errors += stripe->init_nr_csum_errors;
+ sctx->stat.verify_errors += stripe->init_nr_meta_errors;
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1041,6 +995,12 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
/* Save the initial failed bitmap for later repair and report usage. */
stripe->init_error_bitmap = stripe->error_bitmap;
+ stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap,
+ stripe->nr_sectors);
+ stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap,
+ stripe->nr_sectors);
+ stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
+ stripe->nr_sectors);
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
goto out;
@@ -1137,6 +1097,35 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
wake_up(&stripe->io_wait);
}
+static void scrub_submit_write_bio(struct scrub_ctx *sctx,
+ struct scrub_stripe *stripe,
+ struct btrfs_bio *bbio, bool dev_replace)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ u32 bio_len = bbio->bio.bi_iter.bi_size;
+ u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) -
+ stripe->logical;
+
+ fill_writer_pointer_gap(sctx, stripe->physical + bio_off);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
+ if (!btrfs_is_zoned(fs_info))
+ return;
+ /*
+ * For zoned writeback, queue depth must be 1, thus we must wait for
+ * the write to finish before the next write.
+ */
+ wait_scrub_stripe_io(stripe);
+
+ /*
+ * And also need to update the write pointer if write finished
+ * successfully.
+ */
+ if (!test_bit(bio_off >> fs_info->sectorsize_bits,
+ &stripe->write_error_bitmap))
+ sctx->write_pointer += bio_len;
+}
+
/*
* Submit the write bio(s) for the sectors specified by @write_bitmap.
*
@@ -1155,7 +1144,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- const bool zoned = btrfs_is_zoned(fs_info);
int sector_nr;
for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) {
@@ -1168,13 +1156,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
/* Cannot merge with previous sector, submit the current one. */
if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) {
- fill_writer_pointer_gap(sctx, stripe->physical +
- (sector_nr << fs_info->sectorsize_bits));
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- /* For zoned writeback, queue depth must be 1. */
- if (zoned)
- wait_scrub_stripe_io(stripe);
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
if (!bbio) {
@@ -1187,14 +1169,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
ASSERT(ret == fs_info->sectorsize);
}
- if (bbio) {
- fill_writer_pointer_gap(sctx, bbio->bio.bi_iter.bi_sector <<
- SECTOR_SHIFT);
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- if (zoned)
- wait_scrub_stripe_io(stripe);
- }
+ if (bbio)
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
}
/*
@@ -1279,7 +1255,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
u32 stripe_index;
u32 rot;
- *offset = last_offset + (i << BTRFS_STRIPE_LEN_SHIFT);
+ *offset = last_offset + btrfs_stripe_nr_to_offset(i);
stripe_nr = (u32)(*offset >> BTRFS_STRIPE_LEN_SHIFT) / data_stripes;
@@ -1294,7 +1270,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
if (stripe_index < num)
j++;
}
- *offset = last_offset + (j << BTRFS_STRIPE_LEN_SHIFT);
+ *offset = last_offset + btrfs_stripe_nr_to_offset(j);
return 1;
}
@@ -1474,6 +1450,9 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
{
stripe->extent_sector_bitmap = 0;
stripe->init_error_bitmap = 0;
+ stripe->init_nr_io_errors = 0;
+ stripe->init_nr_csum_errors = 0;
+ stripe->init_nr_meta_errors = 0;
stripe->error_bitmap = 0;
stripe->io_error_bitmap = 0;
stripe->csum_error_bitmap = 0;
@@ -1687,7 +1666,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
- nr_stripes << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(nr_stripes));
for (int i = 0; i < nr_stripes; i++) {
stripe = &sctx->stripes[i];
scrub_submit_initial_read(sctx, stripe);
@@ -1714,7 +1693,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
break;
}
}
- } else {
+ } else if (!sctx->readonly) {
for (int i = 0; i < nr_stripes; i++) {
unsigned long repaired;
@@ -1810,7 +1789,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bool all_empty = true;
const int data_stripes = nr_data_stripes(map);
unsigned long extent_bitmap = 0;
- u64 length = data_stripes << BTRFS_STRIPE_LEN_SHIFT;
+ u64 length = btrfs_stripe_nr_to_offset(data_stripes);
int ret;
ASSERT(sctx->raid56_data_stripes);
@@ -1825,13 +1804,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
data_stripes) >> BTRFS_STRIPE_LEN_SHIFT;
stripe_index = (i + rot) % map->num_stripes;
physical = map->stripes[stripe_index].physical +
- (rot << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(rot);
scrub_reset_stripe(stripe);
set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
ret = scrub_find_fill_first_stripe(bg,
map->stripes[stripe_index].dev, physical, 1,
- full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT),
+ full_stripe_start + btrfs_stripe_nr_to_offset(i),
BTRFS_STRIPE_LEN, stripe);
if (ret < 0)
goto out;
@@ -1841,7 +1820,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
*/
if (ret > 0) {
stripe->logical = full_stripe_start +
- (i << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(i);
stripe->dev = map->stripes[stripe_index].dev;
stripe->mirror_num = 1;
set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
@@ -1929,8 +1908,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bio->bi_end_io = raid56_scrub_wait_endio;
btrfs_bio_counter_inc_blocked(fs_info);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
- &length, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
+ &length, &bioc, NULL, NULL, 1);
if (ret < 0) {
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
@@ -1944,6 +1923,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
btrfs_bio_counter_dec(fs_info);
goto out;
}
+ /* Use the recovered stripes as cache to avoid read them from disk again. */
+ for (int i = 0; i < data_stripes; i++) {
+ stripe = &sctx->raid56_data_stripes[i];
+
+ raid56_parity_cache_data_pages(rbio, stripe->pages,
+ full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
+ }
raid56_parity_submit_scrub_rbio(rbio);
wait_for_completion_io(&io_done);
ret = blk_status_to_errno(bio->bi_status);
@@ -2034,7 +2020,7 @@ static u64 simple_stripe_full_stripe_len(const struct map_lookup *map)
ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10));
- return (map->num_stripes / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT;
+ return btrfs_stripe_nr_to_offset(map->num_stripes / map->sub_stripes);
}
/* Get the logical bytenr for the stripe */
@@ -2050,7 +2036,7 @@ static u64 simple_stripe_get_logical(struct map_lookup *map,
* (stripe_index / sub_stripes) gives how many data stripes we need to
* skip.
*/
- return ((stripe_index / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT) +
+ return btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes) +
bg->start;
}
@@ -2176,7 +2162,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
}
if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
ret = scrub_simple_stripe(sctx, bg, map, scrub_dev, stripe_index);
- offset = (stripe_index / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT;
+ offset = btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes);
goto out;
}
@@ -2191,7 +2177,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
/* Initialize @offset in case we need to go to out: label */
get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL);
- increment = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT;
+ increment = btrfs_stripe_nr_to_offset(nr_data_stripes(map));
/*
* Due to the rotation, for RAID56 it's better to iterate each stripe
@@ -2238,7 +2224,7 @@ next:
}
out:
ret2 = flush_scrub_stripes(sctx);
- if (!ret2)
+ if (!ret)
ret = ret2;
if (sctx->raid56_data_stripes) {
for (int i = 0; i < nr_data_stripes(map); i++)
@@ -2518,13 +2504,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (ret == 0) {
ro_set = 1;
- } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
+ } else if (ret == -ENOSPC && !sctx->is_dev_replace &&
+ !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
* It is not a problem for scrub, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
+ *
+ * For RAID56 chunks, we have to mark them read-only
+ * for scrub, as later we would use our own cache
+ * out of RAID56 realm.
+ * Thus we want the RAID56 bg to be marked RO to
+ * prevent RMW from screwing up out cache.
*/
ro_set = 0;
} else if (ret == -ETXTBSY) {
@@ -2705,17 +2698,12 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
&fs_info->scrub_lock)) {
struct workqueue_struct *scrub_workers = fs_info->scrub_workers;
- struct workqueue_struct *scrub_wr_comp =
- fs_info->scrub_wr_completion_workers;
fs_info->scrub_workers = NULL;
- fs_info->scrub_wr_completion_workers = NULL;
mutex_unlock(&fs_info->scrub_lock);
if (scrub_workers)
destroy_workqueue(scrub_workers);
- if (scrub_wr_comp)
- destroy_workqueue(scrub_wr_comp);
}
}
@@ -2726,7 +2714,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
struct workqueue_struct *scrub_workers = NULL;
- struct workqueue_struct *scrub_wr_comp = NULL;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
int ret = -ENOMEM;
@@ -2734,21 +2721,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
return 0;
- scrub_workers = alloc_workqueue("btrfs-scrub", flags,
- is_dev_replace ? 1 : max_active);
+ if (is_dev_replace)
+ scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
+ else
+ scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
if (!scrub_workers)
- goto fail_scrub_workers;
-
- scrub_wr_comp = alloc_workqueue("btrfs-scrubwrc", flags, max_active);
- if (!scrub_wr_comp)
- goto fail_scrub_wr_completion_workers;
+ return -ENOMEM;
mutex_lock(&fs_info->scrub_lock);
if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
- ASSERT(fs_info->scrub_workers == NULL &&
- fs_info->scrub_wr_completion_workers == NULL);
+ ASSERT(fs_info->scrub_workers == NULL);
fs_info->scrub_workers = scrub_workers;
- fs_info->scrub_wr_completion_workers = scrub_wr_comp;
refcount_set(&fs_info->scrub_workers_refcnt, 1);
mutex_unlock(&fs_info->scrub_lock);
return 0;
@@ -2759,10 +2742,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
ret = 0;
- destroy_workqueue(scrub_wr_comp);
-fail_scrub_wr_completion_workers:
destroy_workqueue(scrub_workers);
-fail_scrub_workers:
return ret;
}