diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 161 |
1 files changed, 80 insertions, 81 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b53f0e30ce2b..59ea049fe7ee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1341,17 +1341,8 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) { int ret; - unsigned int nofs_flag; - /* - * We might be called under a transaction (e.g. indirect backref - * resolution) which could deadlock if it triggers memory reclaim - */ - nofs_flag = memalloc_nofs_save(); - ret = btrfs_drew_lock_init(&root->snapshot_lock); - memalloc_nofs_restore(nofs_flag); - if (ret) - goto fail; + btrfs_drew_lock_init(&root->snapshot_lock); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && !btrfs_is_data_reloc_root(root)) { @@ -2065,7 +2056,6 @@ void btrfs_put_root(struct btrfs_root *root) WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); - btrfs_drew_lock_destroy(&root->snapshot_lock); free_root_extent_buffers(root); #ifdef CONFIG_BTRFS_DEBUG spin_lock(&root->fs_info->fs_roots_radix_lock); @@ -2125,11 +2115,16 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->reloc_cancel_req, 0); } -static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) +static int btrfs_init_btree_inode(struct super_block *sb) { - struct inode *inode = fs_info->btree_inode; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID, fs_info->tree_root); + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + return -ENOMEM; inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; set_nlink(inode, 1); @@ -2140,6 +2135,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) */ inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &btree_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, @@ -2152,6 +2148,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) BTRFS_I(inode)->location.offset = 0; set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); __insert_inode_hash(inode, hash); + fs_info->btree_inode = inode; + + return 0; } static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) @@ -2250,6 +2249,20 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) fs_info->csum_shash = csum_shash; + /* + * Check if the checksum implementation is a fast accelerated one. + * As-is this is a bit of a hack and should be replaced once the csum + * implementations provide that information themselves. + */ + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + if (!strstr(crypto_shash_driver_name(csum_shash), "generic")) + set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); + break; + default: + break; + } + btrfs_info(fs_info, "using %s (%s) checksum algorithm", btrfs_super_csum_name(csum_type), crypto_shash_driver_name(csum_shash)); @@ -2952,7 +2965,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) atomic64_set(&fs_info->free_chunk_space, 0); fs_info->tree_mod_log = RB_ROOT; fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; - fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ btrfs_init_ref_verify(fs_info); fs_info->thread_pool_size = min_t(unsigned long, @@ -3330,14 +3342,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device struct btrfs_root *tree_root; struct btrfs_root *chunk_root; int ret; - int err = -EINVAL; int level; ret = init_mount_fs_info(fs_info, sb); - if (ret) { - err = ret; + if (ret) goto fail; - } /* These need to be init'ed before we start creating inodes and such. */ tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, @@ -3347,17 +3356,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device GFP_KERNEL); fs_info->chunk_root = chunk_root; if (!tree_root || !chunk_root) { - err = -ENOMEM; + ret = -ENOMEM; goto fail; } - fs_info->btree_inode = new_inode(sb); - if (!fs_info->btree_inode) { - err = -ENOMEM; + ret = btrfs_init_btree_inode(sb); + if (ret) goto fail; - } - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - btrfs_init_btree_inode(fs_info); invalidate_bdev(fs_devices->latest_dev->bdev); @@ -3366,7 +3371,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device */ disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev); if (IS_ERR(disk_super)) { - err = PTR_ERR(disk_super); + ret = PTR_ERR(disk_super); goto fail_alloc; } @@ -3378,7 +3383,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (!btrfs_supported_super_csum(csum_type)) { btrfs_err(fs_info, "unsupported checksum algorithm: %u", csum_type); - err = -EINVAL; + ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } @@ -3387,7 +3392,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ret = btrfs_init_csum_hash(fs_info, csum_type); if (ret) { - err = ret; btrfs_release_disk_super(disk_super); goto fail_alloc; } @@ -3398,7 +3402,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device */ if (btrfs_check_super_csum(fs_info, disk_super)) { btrfs_err(fs_info, "superblock checksum mismatch"); - err = -EINVAL; + ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } @@ -3428,12 +3432,15 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ret = btrfs_validate_mount_super(fs_info); if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); - err = -EINVAL; + ret = -EINVAL; goto fail_alloc; } - if (!btrfs_super_root(disk_super)) + if (!btrfs_super_root(disk_super)) { + btrfs_err(fs_info, "invalid superblock tree root bytenr"); + ret = -EINVAL; goto fail_alloc; + } /* check FS state, whether FS is broken. */ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) @@ -3460,16 +3467,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->stripesize = stripesize; ret = btrfs_parse_options(fs_info, options, sb->s_flags); - if (ret) { - err = ret; + if (ret) goto fail_alloc; - } ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); - if (ret < 0) { - err = ret; + if (ret < 0) goto fail_alloc; - } if (sectorsize < PAGE_SIZE) { struct btrfs_subpage_info *subpage_info; @@ -3489,17 +3492,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL); - if (!subpage_info) + if (!subpage_info) { + ret = -ENOMEM; goto fail_alloc; + } btrfs_init_subpage_info(subpage_info, sectorsize); fs_info->subpage_info = subpage_info; } ret = btrfs_init_workqueues(fs_info); - if (ret) { - err = ret; + if (ret) goto fail_sb_buffer; - } sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); @@ -3545,6 +3548,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device btrfs_free_extra_devids(fs_devices); if (!fs_devices->latest_dev->bdev) { btrfs_err(fs_info, "failed to read devices"); + ret = -EIO; goto fail_tree_roots; } @@ -3560,8 +3564,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ret = btrfs_get_dev_zone_info_all_devices(fs_info); if (ret) { btrfs_err(fs_info, - "zoned: failed to read device zone info: %d", - ret); + "zoned: failed to read device zone info: %d", ret); goto fail_block_groups; } @@ -3640,19 +3643,24 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); + ret = -EINVAL; goto fail_sysfs; } fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info, "btrfs-cleaner"); - if (IS_ERR(fs_info->cleaner_kthread)) + if (IS_ERR(fs_info->cleaner_kthread)) { + ret = PTR_ERR(fs_info->cleaner_kthread); goto fail_sysfs; + } fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); - if (IS_ERR(fs_info->transaction_kthread)) + if (IS_ERR(fs_info->transaction_kthread)) { + ret = PTR_ERR(fs_info->transaction_kthread); goto fail_cleaner; + } if (!btrfs_test_opt(fs_info, NOSSD) && !fs_info->fs_devices->rotating) { @@ -3670,7 +3678,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->fs_devices->discardable) { btrfs_set_and_info(fs_info, DISCARD_ASYNC, "auto enabling async discard"); - btrfs_clear_opt(fs_info->mount_opt, NODISCARD); } #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY @@ -3697,16 +3704,14 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); - if (ret) { - err = ret; + if (ret) goto fail_qgroup; - } } fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { - err = PTR_ERR(fs_info->fs_root); - btrfs_warn(fs_info, "failed to read fs tree: %d", err); + ret = PTR_ERR(fs_info->fs_root); + btrfs_warn(fs_info, "failed to read fs tree: %d", ret); fs_info->fs_root = NULL; goto fail_qgroup; } @@ -3783,7 +3788,8 @@ fail_alloc: iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); - return err; + ASSERT(ret < 0); + return ret; } ALLOW_ERROR_INJECTION(open_ctree, ERRNO); @@ -4080,6 +4086,8 @@ static void write_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; + device->last_flush_error = BLK_STS_OK; + #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY /* * When a disk has write caching disabled, we skip submission of a bio @@ -4108,25 +4116,24 @@ static void write_dev_flush(struct btrfs_device *device) /* * If the flush bio has been submitted by write_dev_flush, wait for it. + * Return true for any error, and false otherwise. */ -static blk_status_t wait_dev_flush(struct btrfs_device *device) +static bool wait_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; - if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) - return BLK_STS_OK; + if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) + return false; - clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); wait_for_completion_io(&device->flush_wait); - return bio->bi_status; -} + if (bio->bi_status) { + device->last_flush_error = bio->bi_status; + btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); + return true; + } -static int check_barrier_error(struct btrfs_fs_info *fs_info) -{ - if (!btrfs_check_rw_degradable(fs_info, NULL)) - return -EIO; - return 0; + return false; } /* @@ -4138,7 +4145,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info) struct list_head *head; struct btrfs_device *dev; int errors_wait = 0; - blk_status_t ret; lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ @@ -4153,7 +4159,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; write_dev_flush(dev); - dev->last_flush_error = BLK_STS_OK; } /* wait for all the barriers */ @@ -4168,23 +4173,17 @@ static int barrier_all_devices(struct btrfs_fs_info *info) !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; - ret = wait_dev_flush(dev); - if (ret) { - dev->last_flush_error = ret; - btrfs_dev_stat_inc_and_print(dev, - BTRFS_DEV_STAT_FLUSH_ERRS); + if (wait_dev_flush(dev)) errors_wait++; - } } - if (errors_wait) { - /* - * At some point we need the status of all disks - * to arrive at the volume status. So error checking - * is being pushed to a separate loop. - */ - return check_barrier_error(info); - } + /* + * Checks last_flush_error of disks in order to determine the device + * state. + */ + if (errors_wait && !btrfs_check_rw_degradable(info, NULL)) + return -EIO; + return 0; } @@ -4390,12 +4389,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[i]->root_key.objectid; err = btrfs_orphan_cleanup(gang[i]); if (err) - break; + goto out; btrfs_put_root(gang[i]); } root_objectid++; } - +out: /* release the uncleaned roots due to error */ for (; i < ret; i++) { if (gang[i]) |