summaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c668
1 files changed, 419 insertions, 249 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 3afe5fa50a63..f260c53829e5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -54,6 +54,8 @@ struct scrub_ctx;
*/
#define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K)
+#define SCRUB_MAX_PAGES (DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE))
+
struct scrub_recover {
refcount_t refs;
struct btrfs_io_context *bioc;
@@ -62,16 +64,12 @@ struct scrub_recover {
struct scrub_sector {
struct scrub_block *sblock;
- struct page *page;
- struct btrfs_device *dev;
struct list_head list;
u64 flags; /* extent flags */
u64 generation;
- u64 logical;
- u64 physical;
- u64 physical_for_dev_replace;
+ /* Offset in bytes to @sblock. */
+ u32 offset;
atomic_t refs;
- u8 mirror_num;
unsigned int have_csum:1;
unsigned int io_error:1;
u8 csum[BTRFS_CSUM_SIZE];
@@ -94,8 +92,22 @@ struct scrub_bio {
};
struct scrub_block {
+ /*
+ * Each page will have its page::private used to record the logical
+ * bytenr.
+ */
+ struct page *pages[SCRUB_MAX_PAGES];
struct scrub_sector *sectors[SCRUB_MAX_SECTORS_PER_BLOCK];
+ struct btrfs_device *dev;
+ /* Logical bytenr of the sblock */
+ u64 logical;
+ u64 physical;
+ u64 physical_for_dev_replace;
+ /* Length of sblock in bytes */
+ u32 len;
int sector_count;
+ int mirror_num;
+
atomic_t outstanding_sectors;
refcount_t refs; /* free mem on transition to zero */
struct scrub_ctx *sctx;
@@ -202,8 +214,174 @@ struct full_stripe_lock {
struct mutex mutex;
};
+#ifndef CONFIG_64BIT
+/* This structure is for archtectures whose (void *) is smaller than u64 */
+struct scrub_page_private {
+ u64 logical;
+};
+#endif
+
+static int attach_scrub_page_private(struct page *page, u64 logical)
+{
+#ifdef CONFIG_64BIT
+ attach_page_private(page, (void *)logical);
+ return 0;
+#else
+ struct scrub_page_private *spp;
+
+ spp = kmalloc(sizeof(*spp), GFP_KERNEL);
+ if (!spp)
+ return -ENOMEM;
+ spp->logical = logical;
+ attach_page_private(page, (void *)spp);
+ return 0;
+#endif
+}
+
+static void detach_scrub_page_private(struct page *page)
+{
+#ifdef CONFIG_64BIT
+ detach_page_private(page);
+ return;
+#else
+ struct scrub_page_private *spp;
+
+ spp = detach_page_private(page);
+ kfree(spp);
+ return;
+#endif
+}
+
+static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
+ struct btrfs_device *dev,
+ u64 logical, u64 physical,
+ u64 physical_for_dev_replace,
+ int mirror_num)
+{
+ struct scrub_block *sblock;
+
+ sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
+ if (!sblock)
+ return NULL;
+ refcount_set(&sblock->refs, 1);
+ sblock->sctx = sctx;
+ sblock->logical = logical;
+ sblock->physical = physical;
+ sblock->physical_for_dev_replace = physical_for_dev_replace;
+ sblock->dev = dev;
+ sblock->mirror_num = mirror_num;
+ sblock->no_io_error_seen = 1;
+ /*
+ * Scrub_block::pages will be allocated at alloc_scrub_sector() when
+ * the corresponding page is not allocated.
+ */
+ return sblock;
+}
+
+/*
+ * Allocate a new scrub sector and attach it to @sblock.
+ *
+ * Will also allocate new pages for @sblock if needed.
+ */
+static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock,
+ u64 logical, gfp_t gfp)
+{
+ const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT;
+ struct scrub_sector *ssector;
+
+ /* We must never have scrub_block exceed U32_MAX in size. */
+ ASSERT(logical - sblock->logical < U32_MAX);
+
+ ssector = kzalloc(sizeof(*ssector), gfp);
+ if (!ssector)
+ return NULL;
+
+ /* Allocate a new page if the slot is not allocated */
+ if (!sblock->pages[page_index]) {
+ int ret;
+
+ sblock->pages[page_index] = alloc_page(gfp);
+ if (!sblock->pages[page_index]) {
+ kfree(ssector);
+ return NULL;
+ }
+ ret = attach_scrub_page_private(sblock->pages[page_index],
+ sblock->logical + (page_index << PAGE_SHIFT));
+ if (ret < 0) {
+ kfree(ssector);
+ __free_page(sblock->pages[page_index]);
+ sblock->pages[page_index] = NULL;
+ return NULL;
+ }
+ }
+
+ atomic_set(&ssector->refs, 1);
+ ssector->sblock = sblock;
+ /* The sector to be added should not be used */
+ ASSERT(sblock->sectors[sblock->sector_count] == NULL);
+ ssector->offset = logical - sblock->logical;
+
+ /* The sector count must be smaller than the limit */
+ ASSERT(sblock->sector_count < SCRUB_MAX_SECTORS_PER_BLOCK);
+
+ sblock->sectors[sblock->sector_count] = ssector;
+ sblock->sector_count++;
+ sblock->len += sblock->sctx->fs_info->sectorsize;
+
+ return ssector;
+}
+
+static struct page *scrub_sector_get_page(struct scrub_sector *ssector)
+{
+ struct scrub_block *sblock = ssector->sblock;
+ pgoff_t index;
+ /*
+ * When calling this function, ssector must be alreaday attached to the
+ * parent sblock.
+ */
+ ASSERT(sblock);
+
+ /* The range should be inside the sblock range */
+ ASSERT(ssector->offset < sblock->len);
+
+ index = ssector->offset >> PAGE_SHIFT;
+ ASSERT(index < SCRUB_MAX_PAGES);
+ ASSERT(sblock->pages[index]);
+ ASSERT(PagePrivate(sblock->pages[index]));
+ return sblock->pages[index];
+}
+
+static unsigned int scrub_sector_get_page_offset(struct scrub_sector *ssector)
+{
+ struct scrub_block *sblock = ssector->sblock;
+
+ /*
+ * When calling this function, ssector must be already attached to the
+ * parent sblock.
+ */
+ ASSERT(sblock);
+
+ /* The range should be inside the sblock range */
+ ASSERT(ssector->offset < sblock->len);
+
+ return offset_in_page(ssector->offset);
+}
+
+static char *scrub_sector_get_kaddr(struct scrub_sector *ssector)
+{
+ return page_address(scrub_sector_get_page(ssector)) +
+ scrub_sector_get_page_offset(ssector);
+}
+
+static int bio_add_scrub_sector(struct bio *bio, struct scrub_sector *ssector,
+ unsigned int len)
+{
+ return bio_add_page(bio, scrub_sector_get_page(ssector), len,
+ scrub_sector_get_page_offset(ssector));
+}
+
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
- struct scrub_block *sblocks_for_recheck);
+ struct scrub_block *sblocks_for_recheck[]);
static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock,
int retry_failed_mirror);
@@ -533,10 +711,8 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
if (sctx->curr != -1) {
struct scrub_bio *sbio = sctx->bios[sctx->curr];
- for (i = 0; i < sbio->sector_count; i++) {
- WARN_ON(!sbio->sectors[i]->page);
+ for (i = 0; i < sbio->sector_count; i++)
scrub_block_put(sbio->sectors[i]->sblock);
- }
bio_put(sbio->bio);
}
@@ -726,15 +902,22 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
int ret;
WARN_ON(sblock->sector_count < 1);
- dev = sblock->sectors[0]->dev;
+ dev = sblock->dev;
fs_info = sblock->sctx->fs_info;
+ /* Super block error, no need to search extent tree. */
+ if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
+ btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
+ errstr, rcu_str_deref(dev->name),
+ sblock->physical);
+ return;
+ }
path = btrfs_alloc_path();
if (!path)
return;
- swarn.physical = sblock->sectors[0]->physical;
- swarn.logical = sblock->sectors[0]->logical;
+ swarn.physical = sblock->physical;
+ swarn.logical = sblock->logical;
swarn.errstr = errstr;
swarn.dev = NULL;
@@ -804,13 +987,14 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
{
struct scrub_ctx *sctx = sblock_to_check->sctx;
- struct btrfs_device *dev;
+ struct btrfs_device *dev = sblock_to_check->dev;
struct btrfs_fs_info *fs_info;
u64 logical;
unsigned int failed_mirror_index;
unsigned int is_metadata;
unsigned int have_csum;
- struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
+ /* One scrub_block for each mirror */
+ struct scrub_block *sblocks_for_recheck[BTRFS_MAX_MIRRORS] = { 0 };
struct scrub_block *sblock_bad;
int ret;
int mirror_index;
@@ -825,22 +1009,23 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
fs_info = sctx->fs_info;
if (sblock_to_check->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
/*
- * if we find an error in a super block, we just report it.
+ * If we find an error in a super block, we just report it.
* They will get written with the next transaction commit
* anyway
*/
+ scrub_print_warning("super block error", sblock_to_check);
spin_lock(&sctx->stat_lock);
++sctx->stat.super_errors;
spin_unlock(&sctx->stat_lock);
+ btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
return 0;
}
- logical = sblock_to_check->sectors[0]->logical;
- BUG_ON(sblock_to_check->sectors[0]->mirror_num < 1);
- failed_mirror_index = sblock_to_check->sectors[0]->mirror_num - 1;
+ logical = sblock_to_check->logical;
+ ASSERT(sblock_to_check->mirror_num);
+ failed_mirror_index = sblock_to_check->mirror_num - 1;
is_metadata = !(sblock_to_check->sectors[0]->flags &
BTRFS_EXTENT_FLAG_DATA);
have_csum = sblock_to_check->sectors[0]->have_csum;
- dev = sblock_to_check->sectors[0]->dev;
if (!sctx->is_dev_replace && btrfs_repair_one_zone(fs_info, logical))
return 0;
@@ -902,17 +1087,28 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* repaired area is verified in order to correctly maintain
* the statistics.
*/
-
- sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
- sizeof(*sblocks_for_recheck), GFP_KERNEL);
- if (!sblocks_for_recheck) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- sctx->stat.read_errors++;
- sctx->stat.uncorrectable_errors++;
- spin_unlock(&sctx->stat_lock);
- btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
- goto out;
+ for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS; mirror_index++) {
+ /*
+ * Note: the two members refs and outstanding_sectors are not
+ * used in the blocks that are used for the recheck procedure.
+ *
+ * But alloc_scrub_block() will initialize sblock::ref anyway,
+ * so we can use scrub_block_put() to clean them up.
+ *
+ * And here we don't setup the physical/dev for the sblock yet,
+ * they will be correctly initialized in scrub_setup_recheck_block().
+ */
+ sblocks_for_recheck[mirror_index] = alloc_scrub_block(sctx, NULL,
+ logical, 0, 0, mirror_index);
+ if (!sblocks_for_recheck[mirror_index]) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.malloc_errors++;
+ sctx->stat.read_errors++;
+ sctx->stat.uncorrectable_errors++;
+ spin_unlock(&sctx->stat_lock);
+ btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
+ goto out;
+ }
}
/* Setup the context, map the logical blocks and alloc the sectors */
@@ -926,7 +1122,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
goto out;
}
BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
- sblock_bad = sblocks_for_recheck + failed_mirror_index;
+ sblock_bad = sblocks_for_recheck[failed_mirror_index];
/* build and submit the bios for the failed mirror, check checksums */
scrub_recheck_block(fs_info, sblock_bad, 1);
@@ -1011,22 +1207,22 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
if (!scrub_is_page_on_raid56(sblock_bad->sectors[0])) {
if (mirror_index >= BTRFS_MAX_MIRRORS)
break;
- if (!sblocks_for_recheck[mirror_index].sector_count)
+ if (!sblocks_for_recheck[mirror_index]->sector_count)
break;
- sblock_other = sblocks_for_recheck + mirror_index;
+ sblock_other = sblocks_for_recheck[mirror_index];
} else {
struct scrub_recover *r = sblock_bad->sectors[0]->recover;
int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs;
if (mirror_index >= max_allowed)
break;
- if (!sblocks_for_recheck[1].sector_count)
+ if (!sblocks_for_recheck[1]->sector_count)
break;
ASSERT(failed_mirror_index == 0);
- sblock_other = sblocks_for_recheck + 1;
- sblock_other->sectors[0]->mirror_num = 1 + mirror_index;
+ sblock_other = sblocks_for_recheck[1];
+ sblock_other->mirror_num = 1 + mirror_index;
}
/* build and submit the bios, check checksums */
@@ -1097,12 +1293,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
/* Try to find no-io-error sector in mirrors */
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
- sblocks_for_recheck[mirror_index].sector_count > 0;
+ sblocks_for_recheck[mirror_index]->sector_count > 0;
mirror_index++) {
- if (!sblocks_for_recheck[mirror_index].
+ if (!sblocks_for_recheck[mirror_index]->
sectors[sector_num]->io_error) {
- sblock_other = sblocks_for_recheck +
- mirror_index;
+ sblock_other = sblocks_for_recheck[mirror_index];
break;
}
}
@@ -1176,25 +1371,28 @@ did_not_correct_error:
}
out:
- if (sblocks_for_recheck) {
- for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
- mirror_index++) {
- struct scrub_block *sblock = sblocks_for_recheck +
- mirror_index;
- struct scrub_recover *recover;
- int i;
-
- for (i = 0; i < sblock->sector_count; i++) {
- sblock->sectors[i]->sblock = NULL;
- recover = sblock->sectors[i]->recover;
- if (recover) {
- scrub_put_recover(fs_info, recover);
- sblock->sectors[i]->recover = NULL;
- }
- scrub_sector_put(sblock->sectors[i]);
+ for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS; mirror_index++) {
+ struct scrub_block *sblock = sblocks_for_recheck[mirror_index];
+ struct scrub_recover *recover;
+ int sector_index;
+
+ /* Not allocated, continue checking the next mirror */
+ if (!sblock)
+ continue;
+
+ for (sector_index = 0; sector_index < sblock->sector_count;
+ sector_index++) {
+ /*
+ * Here we just cleanup the recover, each sector will be
+ * properly cleaned up by later scrub_block_put()
+ */
+ recover = sblock->sectors[sector_index]->recover;
+ if (recover) {
+ scrub_put_recover(fs_info, recover);
+ sblock->sectors[sector_index]->recover = NULL;
}
}
- kfree(sblocks_for_recheck);
+ scrub_block_put(sblock);
}
ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
@@ -1244,12 +1442,12 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
}
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
- struct scrub_block *sblocks_for_recheck)
+ struct scrub_block *sblocks_for_recheck[])
{
struct scrub_ctx *sctx = original_sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->fs_info;
+ u64 logical = original_sblock->logical;
u64 length = original_sblock->sector_count << fs_info->sectorsize_bits;
- u64 logical = original_sblock->sectors[0]->logical;
u64 generation = original_sblock->sectors[0]->generation;
u64 flags = original_sblock->sectors[0]->flags;
u64 have_csum = original_sblock->sectors[0]->have_csum;
@@ -1264,11 +1462,6 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
int nmirrors;
int ret;
- /*
- * Note: the two members refs and outstanding_sectors are not used (and
- * not set) in the blocks that are used for the recheck procedure.
- */
-
while (length > 0) {
sublen = min_t(u64, length, fs_info->sectorsize);
mapped_length = sublen;
@@ -1307,24 +1500,19 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblock;
struct scrub_sector *sector;
- sblock = sblocks_for_recheck + mirror_index;
+ sblock = sblocks_for_recheck[mirror_index];
sblock->sctx = sctx;
- sector = kzalloc(sizeof(*sector), GFP_NOFS);
+ sector = alloc_scrub_sector(sblock, logical, GFP_NOFS);
if (!sector) {
-leave_nomem:
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
scrub_put_recover(fs_info, recover);
return -ENOMEM;
}
- scrub_sector_get(sector);
- sblock->sectors[sector_index] = sector;
- sector->sblock = sblock;
sector->flags = flags;
sector->generation = generation;
- sector->logical = logical;
sector->have_csum = have_csum;
if (have_csum)
memcpy(sector->csum,
@@ -1339,21 +1527,20 @@ leave_nomem:
mirror_index,
&stripe_index,
&stripe_offset);
- sector->physical = bioc->stripes[stripe_index].physical +
- stripe_offset;
- sector->dev = bioc->stripes[stripe_index].dev;
+ /*
+ * We're at the first sector, also populate @sblock
+ * physical and dev.
+ */
+ if (sector_index == 0) {
+ sblock->physical =
+ bioc->stripes[stripe_index].physical +
+ stripe_offset;
+ sblock->dev = bioc->stripes[stripe_index].dev;
+ sblock->physical_for_dev_replace =
+ original_sblock->physical_for_dev_replace;
+ }
BUG_ON(sector_index >= original_sblock->sector_count);
- sector->physical_for_dev_replace =
- original_sblock->sectors[sector_index]->
- physical_for_dev_replace;
- /* For missing devices, dev->bdev is NULL */
- sector->mirror_num = mirror_index + 1;
- sblock->sector_count++;
- sector->page = alloc_page(GFP_NOFS);
- if (!sector->page)
- goto leave_nomem;
-
scrub_get_recover(recover);
sector->recover = recover;
}
@@ -1377,11 +1564,11 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
{
DECLARE_COMPLETION_ONSTACK(done);
- bio->bi_iter.bi_sector = sector->logical >> 9;
+ bio->bi_iter.bi_sector = (sector->offset + sector->sblock->logical) >>
+ SECTOR_SHIFT;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
- raid56_parity_recover(bio, sector->recover->bioc,
- sector->sblock->sectors[0]->mirror_num, false);
+ raid56_parity_recover(bio, sector->recover->bioc, sector->sblock->mirror_num);
wait_for_completion_io(&done);
return blk_status_to_errno(bio->bi_status);
@@ -1395,17 +1582,16 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
int i;
/* All sectors in sblock belong to the same stripe on the same device. */
- ASSERT(first_sector->dev);
- if (!first_sector->dev->bdev)
+ ASSERT(sblock->dev);
+ if (!sblock->dev->bdev)
goto out;
- bio = bio_alloc(first_sector->dev->bdev, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
+ bio = bio_alloc(sblock->dev->bdev, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
for (i = 0; i < sblock->sector_count; i++) {
struct scrub_sector *sector = sblock->sectors[i];
- WARN_ON(!sector->page);
- bio_add_page(bio, sector->page, PAGE_SIZE, 0);
+ bio_add_scrub_sector(bio, sector, fs_info->sectorsize);
}
if (scrub_submit_raid56_bio_wait(fs_info, bio, first_sector)) {
@@ -1449,16 +1635,16 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct bio bio;
struct bio_vec bvec;
- if (sector->dev->bdev == NULL) {
+ if (sblock->dev->bdev == NULL) {
sector->io_error = 1;
sblock->no_io_error_seen = 0;
continue;
}
- WARN_ON(!sector->page);
- bio_init(&bio, sector->dev->bdev, &bvec, 1, REQ_OP_READ);
- bio_add_page(&bio, sector->page, fs_info->sectorsize, 0);
- bio.bi_iter.bi_sector = sector->physical >> 9;
+ bio_init(&bio, sblock->dev->bdev, &bvec, 1, REQ_OP_READ);
+ bio_add_scrub_sector(&bio, sector, fs_info->sectorsize);
+ bio.bi_iter.bi_sector = (sblock->physical + sector->offset) >>
+ SECTOR_SHIFT;
btrfsic_check_bio(&bio);
if (submit_bio_wait(&bio)) {
@@ -1475,7 +1661,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
static inline int scrub_check_fsid(u8 fsid[], struct scrub_sector *sector)
{
- struct btrfs_fs_devices *fs_devices = sector->dev->fs_devices;
+ struct btrfs_fs_devices *fs_devices = sector->sblock->dev->fs_devices;
int ret;
ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
@@ -1521,30 +1707,29 @@ static int scrub_repair_sector_from_good_copy(struct scrub_block *sblock_bad,
struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
const u32 sectorsize = fs_info->sectorsize;
- BUG_ON(sector_bad->page == NULL);
- BUG_ON(sector_good->page == NULL);
if (force_write || sblock_bad->header_error ||
sblock_bad->checksum_error || sector_bad->io_error) {
struct bio bio;
struct bio_vec bvec;
int ret;
- if (!sector_bad->dev->bdev) {
+ if (!sblock_bad->dev->bdev) {
btrfs_warn_rl(fs_info,
"scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
return -EIO;
}
- bio_init(&bio, sector_bad->dev->bdev, &bvec, 1, REQ_OP_WRITE);
- bio.bi_iter.bi_sector = sector_bad->physical >> 9;
- __bio_add_page(&bio, sector_good->page, sectorsize, 0);
+ bio_init(&bio, sblock_bad->dev->bdev, &bvec, 1, REQ_OP_WRITE);
+ bio.bi_iter.bi_sector = (sblock_bad->physical +
+ sector_bad->offset) >> SECTOR_SHIFT;
+ ret = bio_add_scrub_sector(&bio, sector_good, sectorsize);
btrfsic_check_bio(&bio);
ret = submit_bio_wait(&bio);
bio_uninit(&bio);
if (ret) {
- btrfs_dev_stat_inc_and_print(sector_bad->dev,
+ btrfs_dev_stat_inc_and_print(sblock_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
atomic64_inc(&fs_info->dev_replace.num_write_errors);
return -EIO;
@@ -1577,11 +1762,11 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
static int scrub_write_sector_to_dev_replace(struct scrub_block *sblock, int sector_num)
{
+ const u32 sectorsize = sblock->sctx->fs_info->sectorsize;
struct scrub_sector *sector = sblock->sectors[sector_num];
- BUG_ON(sector->page == NULL);
if (sector->io_error)
- clear_page(page_address(sector->page));
+ memset(scrub_sector_get_kaddr(sector), 0, sectorsize);
return scrub_add_sector_to_wr_bio(sblock->sctx, sector);
}
@@ -1608,9 +1793,15 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
return ret;
}
+static void scrub_block_get(struct scrub_block *sblock)
+{
+ refcount_inc(&sblock->refs);
+}
+
static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_sector *sector)
{
+ struct scrub_block *sblock = sector->sblock;
struct scrub_bio *sbio;
int ret;
const u32 sectorsize = sctx->fs_info->sectorsize;
@@ -1629,14 +1820,15 @@ again:
}
sbio = sctx->wr_curr_bio;
if (sbio->sector_count == 0) {
- ret = fill_writer_pointer_gap(sctx, sector->physical_for_dev_replace);
+ ret = fill_writer_pointer_gap(sctx, sector->offset +
+ sblock->physical_for_dev_replace);
if (ret) {
mutex_unlock(&sctx->wr_lock);
return ret;
}
- sbio->physical = sector->physical_for_dev_replace;
- sbio->logical = sector->logical;
+ sbio->physical = sblock->physical_for_dev_replace + sector->offset;
+ sbio->logical = sblock->logical + sector->offset;
sbio->dev = sctx->wr_tgtdev;
if (!sbio->bio) {
sbio->bio = bio_alloc(sbio->dev->bdev, sctx->sectors_per_bio,
@@ -1647,14 +1839,14 @@ again:
sbio->bio->bi_iter.bi_sector = sbio->physical >> 9;
sbio->status = 0;
} else if (sbio->physical + sbio->sector_count * sectorsize !=
- sector->physical_for_dev_replace ||
+ sblock->physical_for_dev_replace + sector->offset ||
sbio->logical + sbio->sector_count * sectorsize !=
- sector->logical) {
+ sblock->logical + sector->offset) {
scrub_wr_submit(sctx);
goto again;
}
- ret = bio_add_page(sbio->bio, sector->page, sectorsize, 0);
+ ret = bio_add_scrub_sector(sbio->bio, sector, sectorsize);
if (ret != sectorsize) {
if (sbio->sector_count < 1) {
bio_put(sbio->bio);
@@ -1668,6 +1860,13 @@ again:
sbio->sectors[sbio->sector_count] = sector;
scrub_sector_get(sector);
+ /*
+ * Since ssector no longer holds a page, but uses sblock::pages, we
+ * have to ensure the sblock had not been freed before our write bio
+ * finished.
+ */
+ scrub_block_get(sector->sblock);
+
sbio->sector_count++;
if (sbio->sector_count == sctx->sectors_per_bio)
scrub_wr_submit(sctx);
@@ -1729,8 +1928,14 @@ static void scrub_wr_bio_end_io_worker(struct work_struct *work)
}
}
- for (i = 0; i < sbio->sector_count; i++)
+ /*
+ * In scrub_add_sector_to_wr_bio() we grab extra ref for sblock, now in
+ * endio we should put the sblock.
+ */
+ for (i = 0; i < sbio->sector_count; i++) {
+ scrub_block_put(sbio->sectors[i]->sblock);
scrub_sector_put(sbio->sectors[i]);
+ }
bio_put(sbio->bio);
kfree(sbio);
@@ -1762,7 +1967,7 @@ static int scrub_checksum(struct scrub_block *sblock)
else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
ret = scrub_checksum_tree_block(sblock);
else if (flags & BTRFS_EXTENT_FLAG_SUPER)
- (void)scrub_checksum_super(sblock);
+ ret = scrub_checksum_super(sblock);
else
WARN_ON(1);
if (ret)
@@ -1785,15 +1990,11 @@ static int scrub_checksum_data(struct scrub_block *sblock)
if (!sector->have_csum)
return 0;
- kaddr = page_address(sector->page);
+ kaddr = scrub_sector_get_kaddr(sector);
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
- /*
- * In scrub_sectors() and scrub_sectors_for_parity() we ensure each sector
- * only contains one sector of data.
- */
crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
if (memcmp(csum, sector->csum, fs_info->csum_size))
@@ -1826,7 +2027,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
ASSERT(sblock->sector_count == num_sectors);
sector = sblock->sectors[0];
- kaddr = page_address(sector->page);
+ kaddr = scrub_sector_get_kaddr(sector);
h = (struct btrfs_header *)kaddr;
memcpy(on_disk_csum, h->csum, sctx->fs_info->csum_size);
@@ -1835,7 +2036,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
* a) don't have an extent buffer and
* b) the page is already kmapped
*/
- if (sector->logical != btrfs_stack_header_bytenr(h))
+ if (sblock->logical != btrfs_stack_header_bytenr(h))
sblock->header_error = 1;
if (sector->generation != btrfs_stack_header_generation(h)) {
@@ -1856,7 +2057,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
sectorsize - BTRFS_CSUM_SIZE);
for (i = 1; i < num_sectors; i++) {
- kaddr = page_address(sblock->sectors[i]->page);
+ kaddr = scrub_sector_get_kaddr(sblock->sectors[i]);
crypto_shash_update(shash, kaddr, sectorsize);
}
@@ -1881,10 +2082,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
BUG_ON(sblock->sector_count < 1);
sector = sblock->sectors[0];
- kaddr = page_address(sector->page);
+ kaddr = scrub_sector_get_kaddr(sector);
s = (struct btrfs_super_block *)kaddr;
- if (sector->logical != btrfs_super_bytenr(s))
+ if (sblock->logical != btrfs_super_bytenr(s))
++fail_cor;
if (sector->generation != btrfs_super_generation(s))
@@ -1901,31 +2102,9 @@ static int scrub_checksum_super(struct scrub_block *sblock)
if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size))
++fail_cor;
- if (fail_cor + fail_gen) {
- /*
- * if we find an error in a super block, we just report it.
- * They will get written with the next transaction commit
- * anyway
- */
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.super_errors;
- spin_unlock(&sctx->stat_lock);
- if (fail_cor)
- btrfs_dev_stat_inc_and_print(sector->dev,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- else
- btrfs_dev_stat_inc_and_print(sector->dev,
- BTRFS_DEV_STAT_GENERATION_ERRS);
- }
-
return fail_cor + fail_gen;
}
-static void scrub_block_get(struct scrub_block *sblock)
-{
- refcount_inc(&sblock->refs);
-}
-
static void scrub_block_put(struct scrub_block *sblock)
{
if (refcount_dec_and_test(&sblock->refs)) {
@@ -1936,6 +2115,12 @@ static void scrub_block_put(struct scrub_block *sblock)
for (i = 0; i < sblock->sector_count; i++)
scrub_sector_put(sblock->sectors[i]);
+ for (i = 0; i < DIV_ROUND_UP(sblock->len, PAGE_SIZE); i++) {
+ if (sblock->pages[i]) {
+ detach_scrub_page_private(sblock->pages[i]);
+ __free_page(sblock->pages[i]);
+ }
+ }
kfree(sblock);
}
}
@@ -1947,11 +2132,8 @@ static void scrub_sector_get(struct scrub_sector *sector)
static void scrub_sector_put(struct scrub_sector *sector)
{
- if (atomic_dec_and_test(&sector->refs)) {
- if (sector->page)
- __free_page(sector->page);
+ if (atomic_dec_and_test(&sector->refs))
kfree(sector);
- }
}
/*
@@ -2056,9 +2238,9 @@ again:
}
sbio = sctx->bios[sctx->curr];
if (sbio->sector_count == 0) {
- sbio->physical = sector->physical;
- sbio->logical = sector->logical;
- sbio->dev = sector->dev;
+ sbio->physical = sblock->physical + sector->offset;
+ sbio->logical = sblock->logical + sector->offset;
+ sbio->dev = sblock->dev;
if (!sbio->bio) {
sbio->bio = bio_alloc(sbio->dev->bdev, sctx->sectors_per_bio,
REQ_OP_READ, GFP_NOFS);
@@ -2068,16 +2250,16 @@ again:
sbio->bio->bi_iter.bi_sector = sbio->physical >> 9;
sbio->status = 0;
} else if (sbio->physical + sbio->sector_count * sectorsize !=
- sector->physical ||
+ sblock->physical + sector->offset ||
sbio->logical + sbio->sector_count * sectorsize !=
- sector->logical ||
- sbio->dev != sector->dev) {
+ sblock->logical + sector->offset ||
+ sbio->dev != sblock->dev) {
scrub_submit(sctx);
goto again;
}
sbio->sectors[sbio->sector_count] = sector;
- ret = bio_add_page(sbio->bio, sector->page, sectorsize, 0);
+ ret = bio_add_scrub_sector(sbio->bio, sector, sectorsize);
if (ret != sectorsize) {
if (sbio->sector_count < 1) {
bio_put(sbio->bio);
@@ -2102,6 +2284,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
struct scrub_block *sblock = bio->bi_private;
struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
+ btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
sblock->no_io_error_seen = 0;
@@ -2118,8 +2301,8 @@ static void scrub_missing_raid56_worker(struct work_struct *work)
u64 logical;
struct btrfs_device *dev;
- logical = sblock->sectors[0]->logical;
- dev = sblock->sectors[0]->dev;
+ logical = sblock->logical;
+ dev = sblock->dev;
if (sblock->no_io_error_seen)
scrub_recheck_block_checksum(sblock);
@@ -2157,7 +2340,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
struct scrub_ctx *sctx = sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->fs_info;
u64 length = sblock->sector_count << fs_info->sectorsize_bits;
- u64 logical = sblock->sectors[0]->logical;
+ u64 logical = sblock->logical;
struct btrfs_io_context *bioc = NULL;
struct bio *bio;
struct btrfs_raid_bio *rbio;
@@ -2193,17 +2376,16 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
for (i = 0; i < sblock->sector_count; i++) {
struct scrub_sector *sector = sblock->sectors[i];
- /*
- * For now, our scrub is still one page per sector, so pgoff
- * is always 0.
- */
- raid56_add_scrub_pages(rbio, sector->page, 0, sector->logical);
+ raid56_add_scrub_pages(rbio, scrub_sector_get_page(sector),
+ scrub_sector_get_page_offset(sector),
+ sector->offset + sector->sblock->logical);
}
INIT_WORK(&sblock->work, scrub_missing_raid56_worker);
scrub_block_get(sblock);
scrub_pending_bio_inc(sctx);
raid56_submit_missing_rbio(rbio);
+ btrfs_put_bioc(bioc);
return;
rbio_out:
@@ -2225,7 +2407,8 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
const u32 sectorsize = sctx->fs_info->sectorsize;
int index;
- sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
+ sblock = alloc_scrub_block(sctx, dev, logical, physical,
+ physical_for_dev_replace, mirror_num);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2233,12 +2416,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
return -ENOMEM;
}
- /* one ref inside this function, plus one for each page added to
- * a bio later on */
- refcount_set(&sblock->refs, 1);
- sblock->sctx = sctx;
- sblock->no_io_error_seen = 1;
-
for (index = 0; len > 0; index++) {
struct scrub_sector *sector;
/*
@@ -2248,36 +2425,22 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
*/
u32 l = min(sectorsize, len);
- sector = kzalloc(sizeof(*sector), GFP_KERNEL);
+ sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
if (!sector) {
-leave_nomem:
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
scrub_block_put(sblock);
return -ENOMEM;
}
- ASSERT(index < SCRUB_MAX_SECTORS_PER_BLOCK);
- scrub_sector_get(sector);
- sblock->sectors[index] = sector;
- sector->sblock = sblock;
- sector->dev = dev;
sector->flags = flags;
sector->generation = gen;
- sector->logical = logical;
- sector->physical = physical;
- sector->physical_for_dev_replace = physical_for_dev_replace;
- sector->mirror_num = mirror_num;
if (csum) {
sector->have_csum = 1;
memcpy(sector->csum, csum, sctx->fs_info->csum_size);
} else {
sector->have_csum = 0;
}
- sblock->sector_count++;
- sector->page = alloc_page(GFP_KERNEL);
- if (!sector->page)
- goto leave_nomem;
len -= l;
logical += l;
physical += l;
@@ -2423,8 +2586,9 @@ static void scrub_block_complete(struct scrub_block *sblock)
}
if (sblock->sparity && corrupted && !sblock->data_corrected) {
- u64 start = sblock->sectors[0]->logical;
- u64 end = sblock->sectors[sblock->sector_count - 1]->logical +
+ u64 start = sblock->logical;
+ u64 end = sblock->logical +
+ sblock->sectors[sblock->sector_count - 1]->offset +
sblock->sctx->fs_info->sectorsize;
ASSERT(end - start <= U32_MAX);
@@ -2508,11 +2672,17 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
+ /*
+ * Block size determines how many scrub_block will be allocated. Here
+ * we use BTRFS_STRIPE_LEN (64KiB) as default limit, so we won't
+ * allocate too many scrub_block, while still won't cause too large
+ * bios for large extents.
+ */
if (flags & BTRFS_EXTENT_FLAG_DATA) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
blocksize = map->stripe_len;
else
- blocksize = sctx->fs_info->sectorsize;
+ blocksize = BTRFS_STRIPE_LEN;
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed++;
sctx->stat.data_bytes_scrubbed += len;
@@ -2578,7 +2748,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity,
ASSERT(IS_ALIGNED(len, sectorsize));
- sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
+ sblock = alloc_scrub_block(sctx, dev, logical, physical, physical, mirror_num);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2586,51 +2756,32 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity,
return -ENOMEM;
}
- /* one ref inside this function, plus one for each page added to
- * a bio later on */
- refcount_set(&sblock->refs, 1);
- sblock->sctx = sctx;
- sblock->no_io_error_seen = 1;
sblock->sparity = sparity;
scrub_parity_get(sparity);
for (index = 0; len > 0; index++) {
struct scrub_sector *sector;
- sector = kzalloc(sizeof(*sector), GFP_KERNEL);
+ sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
if (!sector) {
-leave_nomem:
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
scrub_block_put(sblock);
return -ENOMEM;
}
- ASSERT(index < SCRUB_MAX_SECTORS_PER_BLOCK);
- /* For scrub block */
- scrub_sector_get(sector);
sblock->sectors[index] = sector;
/* For scrub parity */
scrub_sector_get(sector);
list_add_tail(&sector->list, &sparity->sectors_list);
- sector->sblock = sblock;
- sector->dev = dev;
sector->flags = flags;
sector->generation = gen;
- sector->logical = logical;
- sector->physical = physical;
- sector->mirror_num = mirror_num;
if (csum) {
sector->have_csum = 1;
memcpy(sector->csum, csum, sctx->fs_info->csum_size);
} else {
sector->have_csum = 0;
}
- sblock->sector_count++;
- sector->page = alloc_page(GFP_KERNEL);
- if (!sector->page)
- goto leave_nomem;
-
/* Iterate over the stripe range in sectorsize steps */
len -= sectorsize;
@@ -2774,6 +2925,7 @@ static void scrub_parity_bio_endio_worker(struct work_struct *work)
work);
struct scrub_ctx *sctx = sparity->sctx;
+ btrfs_bio_counter_dec(sctx->fs_info);
scrub_free_parity(sparity);
scrub_pending_bio_dec(sctx);
}
@@ -2824,6 +2976,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
sparity->scrub_dev,
&sparity->dbitmap,
sparity->nsectors);
+ btrfs_put_bioc(bioc);
if (!rbio)
goto rbio_out;
@@ -2835,7 +2988,6 @@ rbio_out:
bio_put(bio);
bioc_out:
btrfs_bio_counter_dec(fs_info);
- btrfs_put_bioc(bioc);
bitmap_or(&sparity->ebitmap, &sparity->ebitmap, &sparity->dbitmap,
sparity->nsectors);
spin_lock(&sctx->stat_lock);
@@ -3077,7 +3229,7 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
ret = btrfs_lookup_csums_range(csum_root, extent_start,
extent_start + extent_size - 1,
- &sctx->csum_list, 1);
+ &sctx->csum_list, 1, false);
if (ret) {
scrub_parity_mark_sectors_error(sparity, extent_start,
extent_size);
@@ -3266,7 +3418,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
}
/* Block group removed? */
spin_lock(&bg->lock);
- if (bg->removed) {
+ if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) {
spin_unlock(&bg->lock);
ret = 0;
break;
@@ -3303,7 +3455,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
if (extent_flags & BTRFS_EXTENT_FLAG_DATA) {
ret = btrfs_lookup_csums_range(csum_root, cur_logical,
cur_logical + scrub_len - 1,
- &sctx->csum_list, 1);
+ &sctx->csum_list, 1, false);
if (ret)
break;
}
@@ -3606,7 +3758,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
* kthread or relocation.
*/
spin_lock(&bg->lock);
- if (!bg->removed)
+ if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags))
ret = -EINVAL;
spin_unlock(&bg->lock);
@@ -3764,13 +3916,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
}
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
- spin_lock(&cache->lock);
- if (!cache->to_copy) {
+ if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) {
spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
goto skip;
}
- spin_unlock(&cache->lock);
}
/*
@@ -3782,7 +3932,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* repair extents.
*/
spin_lock(&cache->lock);
- if (cache->removed) {
+ if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) {
spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
goto skip;
@@ -3942,8 +4092,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* balance is triggered or it becomes used and unused again.
*/
spin_lock(&cache->lock);
- if (!cache->removed && !cache->ro && cache->reserved == 0 &&
- cache->used == 0) {
+ if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags) &&
+ !cache->ro && cache->reserved == 0 && cache->used == 0) {
spin_unlock(&cache->lock);
if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
btrfs_discard_queue_work(&fs_info->discard_ctl,
@@ -4102,36 +4252,21 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
int ret;
struct btrfs_device *dev;
unsigned int nofs_flag;
+ bool need_commit = false;
if (btrfs_fs_closing(fs_info))
return -EAGAIN;
- if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
- /*
- * in this case scrub is unable to calculate the checksum
- * the way scrub is implemented. Do not handle this
- * situation at all because it won't ever happen.
- */
- btrfs_err(fs_info,
- "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
- fs_info->nodesize,
- BTRFS_STRIPE_LEN);
- return -EINVAL;
- }
+ /* At mount time we have ensured nodesize is in the range of [4K, 64K]. */
+ ASSERT(fs_info->nodesize <= BTRFS_STRIPE_LEN);
- if (fs_info->nodesize >
- SCRUB_MAX_SECTORS_PER_BLOCK << fs_info->sectorsize_bits ||
- fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_SECTORS_PER_BLOCK) {
- /*
- * Would exhaust the array bounds of sectorv member in
- * struct scrub_block
- */
- btrfs_err(fs_info,
-"scrub: nodesize and sectorsize <= SCRUB_MAX_SECTORS_PER_BLOCK (%d <= %d && %d <= %d) fails",
- fs_info->nodesize, SCRUB_MAX_SECTORS_PER_BLOCK,
- fs_info->sectorsize, SCRUB_MAX_SECTORS_PER_BLOCK);
- return -EINVAL;
- }
+ /*
+ * SCRUB_MAX_SECTORS_PER_BLOCK is calculated using the largest possible
+ * value (max nodesize / min sectorsize), thus nodesize should always
+ * be fine.
+ */
+ ASSERT(fs_info->nodesize <=
+ SCRUB_MAX_SECTORS_PER_BLOCK << fs_info->sectorsize_bits);
/* Allocate outside of device_list_mutex */
sctx = scrub_setup_ctx(fs_info, is_dev_replace);
@@ -4205,6 +4340,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
*/
nofs_flag = memalloc_nofs_save();
if (!is_dev_replace) {
+ u64 old_super_errors;
+
+ spin_lock(&sctx->stat_lock);
+ old_super_errors = sctx->stat.super_errors;
+ spin_unlock(&sctx->stat_lock);
+
btrfs_info(fs_info, "scrub: started on devid %llu", devid);
/*
* by holding device list mutex, we can
@@ -4213,6 +4354,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->fs_devices->device_list_mutex);
ret = scrub_supers(sctx, dev);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
+ spin_lock(&sctx->stat_lock);
+ /*
+ * Super block errors found, but we can not commit transaction
+ * at current context, since btrfs_commit_transaction() needs
+ * to pause the current running scrub (hold by ourselves).
+ */
+ if (sctx->stat.super_errors > old_super_errors && !sctx->readonly)
+ need_commit = true;
+ spin_unlock(&sctx->stat_lock);
}
if (!ret)
@@ -4239,6 +4390,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
scrub_workers_put(fs_info);
scrub_put_ctx(sctx);
+ /*
+ * We found some super block errors before, now try to force a
+ * transaction commit, as scrub has finished.
+ */
+ if (need_commit) {
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_start_transaction(fs_info->tree_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ btrfs_err(fs_info,
+ "scrub: failed to start transaction to fix super block errors: %d", ret);
+ return ret;
+ }
+ ret = btrfs_commit_transaction(trans);
+ if (ret < 0)
+ btrfs_err(fs_info,
+ "scrub: failed to commit transaction to fix super block errors: %d", ret);
+ }
return ret;
out:
scrub_workers_put(fs_info);