diff options
Diffstat (limited to 'fs')
38 files changed, 270 insertions, 110 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e54f0884802a..79336fa853db 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -45,7 +45,8 @@ static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx, int root_count; bool cached; - if (!btrfs_file_extent_compression(eb, fi) && + if (!ctx->ignore_extent_item_pos && + !btrfs_file_extent_compression(eb, fi) && !btrfs_file_extent_encryption(eb, fi) && !btrfs_file_extent_other_encoding(eb, fi)) { u64 data_offset; @@ -552,7 +553,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, count++; else goto next; - if (!ctx->ignore_extent_item_pos) { + if (!ctx->skip_inode_ref_list) { ret = check_extent_in_eb(ctx, &key, eb, fi, &eie); if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || ret < 0) @@ -564,7 +565,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, eie, (void **)&old, GFP_NOFS); if (ret < 0) break; - if (!ret && !ctx->ignore_extent_item_pos) { + if (!ret && !ctx->skip_inode_ref_list) { while (old->next) old = old->next; old->next = eie; @@ -1606,7 +1607,7 @@ again: goto out; } if (ref->count && ref->parent) { - if (!ctx->ignore_extent_item_pos && !ref->inode_list && + if (!ctx->skip_inode_ref_list && !ref->inode_list && ref->level == 0) { struct btrfs_tree_parent_check check = { 0 }; struct extent_buffer *eb; @@ -1647,7 +1648,7 @@ again: (void **)&eie, GFP_NOFS); if (ret < 0) goto out; - if (!ret && !ctx->ignore_extent_item_pos) { + if (!ret && !ctx->skip_inode_ref_list) { /* * We've recorded that parent, so we must extend * its inode list here. @@ -1743,7 +1744,7 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx) static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) { const u64 orig_bytenr = ctx->bytenr; - const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos; + const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list; bool roots_ulist_allocated = false; struct ulist_iterator uiter; int ret = 0; @@ -1764,7 +1765,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) roots_ulist_allocated = true; } - ctx->ignore_extent_item_pos = true; + ctx->skip_inode_ref_list = true; ULIST_ITER_INIT(&uiter); while (1) { @@ -1789,7 +1790,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) ulist_free(ctx->refs); ctx->refs = NULL; ctx->bytenr = orig_bytenr; - ctx->ignore_extent_item_pos = orig_ignore_extent_item_pos; + ctx->skip_inode_ref_list = orig_skip_inode_ref_list; return ret; } @@ -1912,7 +1913,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, goto out_trans; } - walk_ctx.ignore_extent_item_pos = true; + walk_ctx.skip_inode_ref_list = true; walk_ctx.trans = trans; walk_ctx.fs_info = fs_info; walk_ctx.refs = &ctx->refs; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index ef6bbea3f456..1616e3e3f1e4 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -60,6 +60,12 @@ struct btrfs_backref_walk_ctx { * @extent_item_pos is ignored. */ bool ignore_extent_item_pos; + /* + * If true and bytenr corresponds to a data extent, then the inode list + * (each member describing inode number, file offset and root) is not + * added to each reference added to the @refs ulist. + */ + bool skip_inode_ref_list; /* A valid transaction handle or NULL. */ struct btrfs_trans_handle *trans; /* diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 3ab707e26fa2..ac18c43fadad 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -124,7 +124,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } - if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + if (qgroup_to_release_ret && + block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { qgroup_to_release = block_rsv->qgroup_rsv_reserved - block_rsv->qgroup_rsv_size; block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3c983c70028a..2ff2961b1183 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2627,6 +2627,10 @@ static bool check_sibling_keys(struct extent_buffer *left, } if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) { + btrfs_crit(left->fs_info, "left extent buffer:"); + btrfs_print_tree(left, false); + btrfs_crit(left->fs_info, "right extent buffer:"); + btrfs_print_tree(right, false); btrfs_crit(left->fs_info, "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", left_last.objectid, left_last.type, @@ -3215,6 +3219,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (check_sibling_keys(left, right)) { ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); btrfs_tree_unlock(right); free_extent_buffer(right); return ret; @@ -3433,6 +3438,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (check_sibling_keys(left, right)) { ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); goto out; } return __push_leaf_left(trans, path, min_data_size, empty, left, @@ -4478,10 +4484,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; + struct btrfs_key orig_key; struct btrfs_disk_key found_key; int ret; btrfs_item_key_to_cpu(path->nodes[0], &key, 0); + orig_key = key; if (key.offset > 0) { key.offset--; @@ -4498,8 +4506,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret <= 0) return ret; + + /* + * Previous key not found. Even if we were at slot 0 of the leaf we had + * before releasing the path and calling btrfs_search_slot(), we now may + * be in a slot pointing to the same original key - this can happen if + * after we released the path, one of more items were moved from a + * sibling leaf into the front of the leaf we had due to an insertion + * (see push_leaf_right()). + * If we hit this case and our slot is > 0 and just decrement the slot + * so that the caller does not process the same key again, which may or + * may not break the caller, depending on its logic. + */ + if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { + btrfs_item_key(path->nodes[0], &found_key, path->slots[0]); + ret = comp_keys(&found_key, &orig_key); + if (ret == 0) { + if (path->slots[0] > 0) { + path->slots[0]--; + return 0; + } + /* + * At slot 0, same key as before, it means orig_key is + * the lowest, leftmost, key in the tree. We're done. + */ + return 1; + } + } + btrfs_item_key(path->nodes[0], &found_key, 0); ret = comp_keys(&found_key, &key); /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 59ea049fe7ee..fbf9006c6234 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3121,23 +3121,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) { int ret; const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); - bool clear_free_space_tree = false; + bool rebuild_free_space_tree = false; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { - clear_free_space_tree = true; + rebuild_free_space_tree = true; } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { btrfs_warn(fs_info, "free space tree is invalid"); - clear_free_space_tree = true; + rebuild_free_space_tree = true; } - if (clear_free_space_tree) { - btrfs_info(fs_info, "clearing free space tree"); - ret = btrfs_clear_free_space_tree(fs_info); + if (rebuild_free_space_tree) { + btrfs_info(fs_info, "rebuilding free space tree"); + ret = btrfs_rebuild_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, - "failed to clear free space tree: %d", ret); + "failed to rebuild free space tree: %d", ret); + goto out; + } + } + + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) { + btrfs_info(fs_info, "disabling free space tree"); + ret = btrfs_delete_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to disable free space tree: %d", ret); goto out; } } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 018c711a0bc8..cd4cce9ba443 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -52,13 +52,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz u64 start, end, i_size; int ret; + spin_lock(&inode->lock); i_size = new_i_size ?: i_size_read(&inode->vfs_inode); if (btrfs_fs_incompat(fs_info, NO_HOLES)) { inode->disk_i_size = i_size; - return; + goto out_unlock; } - spin_lock(&inode->lock); ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start, &end, EXTENT_DIRTY); if (!ret && start == 0) @@ -66,6 +66,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz else i_size = 0; inode->disk_i_size = i_size; +out_unlock: spin_unlock(&inode->lock); } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d84cef89cdff..cf98a3c05480 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -870,15 +870,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, } spin_lock(&ctl->tree_lock); ret = link_free_space(ctl, e); - ctl->total_bitmaps++; - recalculate_thresholds(ctl); - spin_unlock(&ctl->tree_lock); if (ret) { + spin_unlock(&ctl->tree_lock); btrfs_err(fs_info, "Duplicate entries in free space cache, dumping"); kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } + ctl->total_bitmaps++; + recalculate_thresholds(ctl); + spin_unlock(&ctl->tree_lock); list_add_tail(&e->list, &bitmaps); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 4d155a48ec59..b21da1446f2a 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1252,7 +1252,7 @@ out: return ret; } -int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) +int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = fs_info->tree_root; @@ -1298,6 +1298,54 @@ abort: return ret; } +int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key = { + .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, + .type = BTRFS_ROOT_ITEM_KEY, + .offset = 0, + }; + struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); + struct rb_node *node; + int ret; + + trans = btrfs_start_transaction(free_space_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + + ret = clear_free_space_tree(trans, free_space_root); + if (ret) + goto abort; + + node = rb_first_cached(&fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *block_group; + + block_group = rb_entry(node, struct btrfs_block_group, + cache_node); + ret = populate_free_space_tree(trans, block_group); + if (ret) + goto abort; + node = rb_next(node); + } + + btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); + btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); + clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + + ret = btrfs_commit_transaction(trans); + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + return ret; +abort: + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + return ret; +} + static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct btrfs_path *path) diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h index dc2463e4cfe3..6d5551d0ced8 100644 --- a/fs/btrfs/free-space-tree.h +++ b/fs/btrfs/free-space-tree.h @@ -18,7 +18,8 @@ struct btrfs_caching_control; void set_free_space_tree_thresholds(struct btrfs_block_group *block_group); int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info); -int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info); +int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info); +int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info); int load_free_space_tree(struct btrfs_caching_control *caching_ctl); int add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 57d070025c7a..19c707bc8801 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3108,6 +3108,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) btrfs_rewrite_logical_zoned(ordered_extent); btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, ordered_extent->disk_num_bytes); + } else if (btrfs_is_data_reloc_root(inode->root)) { + btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); } if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 25833b4eeaf5..2fa36f694daa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -454,7 +454,9 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, case BTRFS_EXCLOP_BALANCE_PAUSED: spin_lock(&fs_info->super_lock); ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE || - fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD); + fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD || + fs_info->exclusive_operation == BTRFS_EXCLOP_NONE || + fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED); fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED; spin_unlock(&fs_info->super_lock); break; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index b93c96213304..497b9dbd8a13 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -151,10 +151,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) pr_cont("shared data backref parent %llu count %u\n", offset, btrfs_shared_data_ref_count(eb, sref)); /* - * offset is supposed to be a tree block which - * must be aligned to nodesize. + * Offset is supposed to be a tree block which must be + * aligned to sectorsize. */ - if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) + if (!IS_ALIGNED(offset, eb->fs_info->sectorsize)) pr_info( "\t\t\t(parent %llu not aligned to sectorsize %u)\n", offset, eb->fs_info->sectorsize); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 09b1988d1791..59a06499c647 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3422,7 +3422,7 @@ int add_data_references(struct reloc_control *rc, btrfs_release_path(path); ctx.bytenr = extent_key->objectid; - ctx.ignore_extent_item_pos = true; + ctx.skip_inode_ref_list = true; ctx.fs_info = rc->extent_root->fs_info; ret = btrfs_find_all_leafs(&ctx); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6cb97efee976..ec18e2210602 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -826,7 +826,11 @@ out: !btrfs_test_opt(info, CLEAR_CACHE)) { btrfs_err(info, "cannot disable free space tree"); ret = -EINVAL; - + } + if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) && + !btrfs_test_opt(info, FREE_SPACE_TREE)) { + btrfs_err(info, "cannot disable free space tree with block-group-tree feature"); + ret = -EINVAL; } if (!ret) ret = btrfs_check_mountopts_zoned(info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 03f52e4a20aa..841e799dece5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -395,6 +395,7 @@ void btrfs_free_device(struct btrfs_device *device) { WARN_ON(!list_empty(&device->post_commit_list)); rcu_string_free(device->name); + extent_io_tree_release(&device->alloc_state); btrfs_destroy_dev_zone_info(device); kfree(device); } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a9b32ba6b2ce..39828af4a4e8 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -122,10 +122,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, int i; for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { - u64 bytenr; - - bytenr = ((zones[i].start + zones[i].len) - << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE; + u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT; + u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - + BTRFS_SUPER_INFO_SIZE; page[i] = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); @@ -1168,12 +1167,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) return -ERANGE; /* All the zones are conventional */ - if (find_next_bit(zinfo->seq_zones, begin, end) == end) + if (find_next_bit(zinfo->seq_zones, end, begin) == end) return 0; /* All the zones are sequential and empty */ - if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end && - find_next_zero_bit(zinfo->empty_zones, begin, end) == end) + if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end && + find_next_zero_bit(zinfo->empty_zones, end, begin) == end) return 0; for (pos = start; pos < start + size; pos += zinfo->zone_size) { @@ -1610,11 +1609,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans, !list_empty(&eb->release_list)) return; + memzero_extent_buffer(eb, 0, eb->len); + set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); set_extent_buffer_dirty(eb); set_extent_bits_nowait(&trans->dirty_pages, eb->start, eb->start + eb->len - 1, EXTENT_DIRTY); - memzero_extent_buffer(eb, 0, eb->len); - set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); spin_lock(&trans->releasing_ebs_lock); list_add_tail(&eb->release_list, &trans->releasing_ebs); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 32f7c81a7b89..43a4d8603db3 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -246,7 +246,7 @@ cifs_read_super(struct super_block *sb) if (cifs_sb->ctx->rasize) sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE; else - sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE; + sb->s_bdi->ra_pages = 2 * (cifs_sb->ctx->rsize / PAGE_SIZE); sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ @@ -744,6 +744,7 @@ static void cifs_umount_begin(struct super_block *sb) spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); + cifs_close_all_deferred_files(tcon); /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ if (tcon->ses && tcon->ses->server) { @@ -759,6 +760,20 @@ static void cifs_umount_begin(struct super_block *sb) return; } +static int cifs_freeze(struct super_block *sb) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_tcon *tcon; + + if (cifs_sb == NULL) + return 0; + + tcon = cifs_sb_master_tcon(cifs_sb); + + cifs_close_all_deferred_files(tcon); + return 0; +} + #ifdef CONFIG_CIFS_STATS2 static int cifs_show_stats(struct seq_file *s, struct dentry *root) { @@ -797,6 +812,7 @@ static const struct super_operations cifs_super_ops = { as opens */ .show_options = cifs_show_options, .umount_begin = cifs_umount_begin, + .freeze_fs = cifs_freeze, #ifdef CONFIG_CIFS_STATS2 .show_stats = cifs_show_stats, #endif diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index eeeed6fda13b..8e9a672320ab 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2709,6 +2709,13 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&cifs_tcp_ses_lock); cifs_sb = CIFS_SB(sb); + + /* We do not want to use a superblock that has been shutdown */ + if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } + tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); if (tlink == NULL) { /* can not match superblock if tlink were ever null */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a81758225fcd..a295e4c2d54e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1682,7 +1682,7 @@ smb2_copychunk_range(const unsigned int xid, pcchunk->SourceOffset = cpu_to_le64(src_off); pcchunk->TargetOffset = cpu_to_le64(dest_off); pcchunk->Length = - cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk)); + cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk)); /* Request server copy to target from src identified by key */ kfree(retbuf); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e33ca0d33906..9ed61b6f9b21 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1947,6 +1947,9 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, init_copy_chunk_defaults(tcon); if (server->ops->validate_negotiate) rc = server->ops->validate_negotiate(xid, tcon); + if (rc == 0) /* See MS-SMB2 2.2.10 and 3.2.5.5 */ + if (tcon->share_flags & SMB2_SHAREFLAG_ISOLATED_TRANSPORT) + server->nosharesock = true; tcon_exit: free_rsp_buf(resp_buftype, rsp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5eed8c237500..a84bf6444bba 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1419,6 +1419,14 @@ static void gfs2_evict_inode(struct inode *inode) if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; + /* + * In case of an incomplete mount, gfs2_evict_inode() may be called for + * system files without having an active journal to write to. In that + * case, skip the filesystem evict. + */ + if (!sdp->sd_jdesc) + goto out; + gfs2_holder_mark_uninitialized(&gh); ret = evict_should_delete(inode, &gh); if (ret == SHOULD_DEFER_EVICTION) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bacad0c57810..e63c1d46f189 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -402,7 +402,7 @@ static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping, struct folio *folio; folio = filemap_grab_folio(mapping, index); - if (!folio) + if (IS_ERR(folio)) return NULL; nfs_readdir_folio_init_and_validate(folio, cookie, change_attr); return folio; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 49cfe2ae6d23..993375f0db67 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -65,7 +65,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, struct fsnotify_event *fsn_event; struct fsnotify_group *group = inode_mark->group; int ret; - int len = 0; + int len = 0, wd; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; @@ -81,6 +81,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, fsn_mark); /* + * We can be racing with mark being detached. Don't report event with + * invalid wd. + */ + wd = READ_ONCE(i_mark->wd); + if (wd == -1) + return 0; + /* * Whoever is interested in the event, pays for the allocation. Do not * trigger OOM killer in the target monitoring memcg as it may have * security repercussion. @@ -110,7 +117,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, fsn_event = &event->fse; fsnotify_init_event(fsn_event); event->mask = mask; - event->wd = i_mark->wd; + event->wd = wd; event->sync_cookie = cookie; event->name_len = len; if (len) diff --git a/fs/pipe.c b/fs/pipe.c index ceb17d2dfa19..2d88f73f585a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -342,7 +342,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) break; if (ret) break; - if (filp->f_flags & O_NONBLOCK) { + if ((filp->f_flags & O_NONBLOCK) || + (iocb->ki_flags & IOCB_NOWAIT)) { ret = -EAGAIN; break; } @@ -547,7 +548,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) continue; /* Wait for buffer space to become available. */ - if (filp->f_flags & O_NONBLOCK) { + if ((filp->f_flags & O_NONBLOCK) || + (iocb->ki_flags & IOCB_NOWAIT)) { if (!ret) ret = -EAGAIN; break; diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 1b078bbbf225..9b373a0c7aaf 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -495,10 +495,12 @@ xfs_freesp_init_recs( ASSERT(start >= mp->m_ag_prealloc_blocks); if (start != mp->m_ag_prealloc_blocks) { /* - * Modify first record to pad stripe align of log + * Modify first record to pad stripe align of log and + * bump the record count. */ arec->ar_blockcount = cpu_to_be32(start - mp->m_ag_prealloc_blocks); + be16_add_cpu(&block->bb_numrecs, 1); nrec = arec + 1; /* @@ -509,7 +511,6 @@ xfs_freesp_init_recs( be32_to_cpu(arec->ar_startblock) + be32_to_cpu(arec->ar_blockcount)); arec = nrec; - be16_add_cpu(&block->bb_numrecs, 1); } /* * Change record start to after the internal log @@ -518,15 +519,13 @@ xfs_freesp_init_recs( } /* - * Calculate the record block count and check for the case where - * the log might have consumed all available space in the AG. If - * so, reset the record count to 0 to avoid exposure of an invalid - * record start block. + * Calculate the block count of this record; if it is nonzero, + * increment the record count. */ arec->ar_blockcount = cpu_to_be32(id->agsize - be32_to_cpu(arec->ar_startblock)); - if (!arec->ar_blockcount) - block->bb_numrecs = 0; + if (arec->ar_blockcount) + be16_add_cpu(&block->bb_numrecs, 1); } /* @@ -538,7 +537,7 @@ xfs_bnoroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno); xfs_freesp_init_recs(mp, bp, id); } @@ -548,7 +547,7 @@ xfs_cntroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno); xfs_freesp_init_recs(mp, bp, id); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b512de0540d5..cd8870a16fd1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3494,8 +3494,10 @@ xfs_bmap_btalloc_at_eof( if (!caller_pag) args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno)); error = xfs_alloc_vextent_exact_bno(args, ap->blkno); - if (!caller_pag) + if (!caller_pag) { xfs_perag_put(args->pag); + args->pag = NULL; + } if (error) return error; @@ -3505,7 +3507,6 @@ xfs_bmap_btalloc_at_eof( * Exact allocation failed. Reset to try an aligned allocation * according to the original allocation specification. */ - args->pag = NULL; args->alignment = stripe_align; args->minlen = nextminlen; args->minalignslop = 0; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 87ab9f95a487..69bc89d0fc68 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -42,12 +42,12 @@ xchk_setup_inode_bmap( xfs_ilock(sc->ip, XFS_IOLOCK_EXCL); /* - * We don't want any ephemeral data fork updates sitting around + * We don't want any ephemeral data/cow fork updates sitting around * while we inspect block mappings, so wait for directio to finish * and flush dirty data if we have delalloc reservations. */ if (S_ISREG(VFS_I(sc->ip)->i_mode) && - sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { + sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { struct address_space *mapping = VFS_I(sc->ip)->i_mapping; sc->ilock_flags |= XFS_MMAPLOCK_EXCL; diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 9aa79665c608..7a20256be969 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks( return 0; } -/* Pause background reaping of resources. */ -void -xchk_stop_reaping( - struct xfs_scrub *sc) -{ - sc->flags |= XCHK_REAPING_DISABLED; - xfs_blockgc_stop(sc->mp); - xfs_inodegc_stop(sc->mp); -} - -/* Restart background reaping of resources. */ -void -xchk_start_reaping( - struct xfs_scrub *sc) -{ - /* - * Readonly filesystems do not perform inactivation or speculative - * preallocation, so there's no need to restart the workers. - */ - if (!xfs_is_readonly(sc->mp)) { - xfs_inodegc_start(sc->mp); - xfs_blockgc_start(sc->mp); - } - sc->flags &= ~XCHK_REAPING_DISABLED; -} - /* * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub * operation. Callers must not hold any locks that intersect with the CPU diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 18b5f2b62f13..791235cd9b00 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) } int xchk_metadata_inode_forks(struct xfs_scrub *sc); -void xchk_stop_reaping(struct xfs_scrub *sc); -void xchk_start_reaping(struct xfs_scrub *sc); /* * Setting up a hook to wait for intents to drain is costly -- we have to take diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index faa315be7978..e382a35e98d8 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -150,13 +150,6 @@ xchk_setup_fscounters( if (error) return error; - /* - * Pause background reclaim while we're scrubbing to reduce the - * likelihood of background perturbations to the counters throwing off - * our calculations. - */ - xchk_stop_reaping(sc); - return xchk_trans_alloc(sc, 0); } @@ -454,6 +447,12 @@ xchk_fscounters( xchk_set_corrupt(sc); /* + * XXX: We can't quiesce percpu counter updates, so exit early. + * This can be re-enabled when we gain exclusive freeze functionality. + */ + return 0; + + /* * If ifree exceeds icount by more than the minimum variance then * something's probably wrong with the counters. */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 02819bedc5b1..3d98f604765e 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -186,8 +186,6 @@ xchk_teardown( } if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) mnt_drop_write_file(sc->file); - if (sc->flags & XCHK_REAPING_DISABLED) - xchk_start_reaping(sc); if (sc->buf) { if (sc->buf_cleanup) sc->buf_cleanup(sc->buf); diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index e71903474cd7..b38e93830dde 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -106,7 +106,6 @@ struct xfs_scrub { /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ #define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ -#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */ #define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */ #define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 68efd6fda61c..b3894daeb86a 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); #define XFS_SCRUB_STATE_STRINGS \ { XCHK_TRY_HARDER, "try_harder" }, \ - { XCHK_REAPING_DISABLED, "reaping_disabled" }, \ { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ { XCHK_NEED_DRAIN, "need_drain" }, \ { XREP_ALREADY_FIXED, "already_fixed" } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index f032d3a4b727..fbb675563208 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -558,7 +558,9 @@ xfs_getbmap( if (!xfs_iext_next_extent(ifp, &icur, &got)) { xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST; + if (bmv->bmv_entries > 0) + out[bmv->bmv_entries - 1].bmv_oflags |= + BMV_OF_LAST; if (whichfork != XFS_ATTR_FORK && bno < end && !xfs_getbmap_full(bmv)) { diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 351849fc18ff..0f60e301eb1f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -435,18 +435,23 @@ xfs_iget_check_free_state( } /* Make all pending inactivation work start immediately. */ -static void +static bool xfs_inodegc_queue_all( struct xfs_mount *mp) { struct xfs_inodegc *gc; int cpu; + bool ret = false; for_each_online_cpu(cpu) { gc = per_cpu_ptr(mp->m_inodegc, cpu); - if (!llist_empty(&gc->list)) + if (!llist_empty(&gc->list)) { mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); + ret = true; + } } + + return ret; } /* @@ -1856,6 +1861,8 @@ xfs_inodegc_worker( struct xfs_inode *ip, *n; unsigned int nofs_flag; + ASSERT(gc->cpu == smp_processor_id()); + WRITE_ONCE(gc->items, 0); if (!node) @@ -1909,24 +1916,41 @@ xfs_inodegc_flush( /* * Flush all the pending work and then disable the inode inactivation background - * workers and wait for them to stop. + * workers and wait for them to stop. Caller must hold sb->s_umount to + * coordinate changes in the inodegc_enabled state. */ void xfs_inodegc_stop( struct xfs_mount *mp) { + bool rerun; + if (!xfs_clear_inodegc_enabled(mp)) return; + /* + * Drain all pending inodegc work, including inodes that could be + * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan + * threads that sample the inodegc state just prior to us clearing it. + * The inodegc flag state prevents new threads from queuing more + * inodes, so we queue pending work items and flush the workqueue until + * all inodegc lists are empty. IOWs, we cannot use drain_workqueue + * here because it does not allow other unserialized mechanisms to + * reschedule inodegc work while this draining is in progress. + */ xfs_inodegc_queue_all(mp); - drain_workqueue(mp->m_inodegc_wq); + do { + flush_workqueue(mp->m_inodegc_wq); + rerun = xfs_inodegc_queue_all(mp); + } while (rerun); trace_xfs_inodegc_stop(mp, __return_address); } /* * Enable the inode inactivation background workers and schedule deferred inode - * inactivation work if there is any. + * inactivation work if there is any. Caller must hold sb->s_umount to + * coordinate changes in the inodegc_enabled state. */ void xfs_inodegc_start( @@ -2069,7 +2093,8 @@ xfs_inodegc_queue( queue_delay = 0; trace_xfs_inodegc_queue(mp, __return_address); - mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay); + mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, + queue_delay); put_cpu_ptr(gc); if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { @@ -2113,7 +2138,8 @@ xfs_inodegc_cpu_dead( if (xfs_is_inodegc_enabled(mp)) { trace_xfs_inodegc_queue(mp, __return_address); - mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0); + mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, + 0); } put_cpu_ptr(gc); } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 285885c308bd..18c8f168b153 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin( if (eof) imap.br_startoff = end_fsb; /* fake hole until the end */ - /* We never need to allocate blocks for zeroing a hole. */ - if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { + /* We never need to allocate blocks for zeroing or unsharing a hole. */ + if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) && + imap.br_startoff > offset_fsb) { xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); goto out_unlock; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f3269c0626f0..aaaf5ec13492 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -66,6 +66,9 @@ struct xfs_inodegc { /* approximate count of inodes in the list */ unsigned int items; unsigned int shrinker_hits; +#if defined(DEBUG) || defined(XFS_WARN) + unsigned int cpu; +#endif }; /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4d2e87462ac4..7e706255f165 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1095,6 +1095,9 @@ xfs_inodegc_init_percpu( for_each_possible_cpu(cpu) { gc = per_cpu_ptr(mp->m_inodegc, cpu); +#if defined(DEBUG) || defined(XFS_WARN) + gc->cpu = cpu; +#endif init_llist_head(&gc->list); gc->items = 0; INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); |