summaryrefslogtreecommitdiff
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c154
1 files changed, 113 insertions, 41 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 32c415cfbdfe..708d843daa72 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -17,6 +17,21 @@
#include "discard.h"
#include "raid56.h"
#include "zoned.h"
+#include "fs.h"
+#include "accessors.h"
+#include "extent-tree.h"
+
+#ifdef CONFIG_BTRFS_DEBUG
+int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+
+ return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) &&
+ block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+ (btrfs_test_opt(fs_info, FRAGMENT_DATA) &&
+ block_group->flags & BTRFS_BLOCK_GROUP_DATA);
+}
+#endif
/*
* Return target flags in extended format or 0 if restripe for this chunk_type
@@ -284,7 +299,7 @@ struct btrfs_block_group *btrfs_next_block_group(
return cache;
}
-/**
+/*
* Check if we can do a NOCOW write for a given extent.
*
* @fs_info: The filesystem information object.
@@ -325,11 +340,9 @@ struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info,
return bg;
}
-/**
+/*
* Decrement the number of NOCOW writers in a block group.
*
- * @bg: The block group.
- *
* This is meant to be called after a previous call to btrfs_inc_nocow_writers(),
* and on the block group returned by that call. Typically this is called after
* creating an ordered extent for a NOCOW write, to prevent races with scrub and
@@ -774,10 +787,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
- /* REVIEW */
if (wait && caching_ctl)
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
- /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
@@ -1529,6 +1540,30 @@ static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
return true;
}
+static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
+{
+ const struct btrfs_space_info *space_info = bg->space_info;
+ const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
+ const u64 new_val = bg->used;
+ const u64 old_val = new_val + bytes_freed;
+ u64 thresh;
+
+ if (reclaim_thresh == 0)
+ return false;
+
+ thresh = mult_perc(bg->length, reclaim_thresh);
+
+ /*
+ * If we were below the threshold before don't reclaim, we are likely a
+ * brand new block group and we don't want to relocate new block groups.
+ */
+ if (old_val < thresh)
+ return false;
+ if (new_val >= thresh)
+ return false;
+ return true;
+}
+
void btrfs_reclaim_bgs_work(struct work_struct *work)
{
struct btrfs_fs_info *fs_info =
@@ -1596,6 +1631,40 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
up_write(&space_info->groups_sem);
goto next;
}
+ if (bg->used == 0) {
+ /*
+ * It is possible that we trigger relocation on a block
+ * group as its extents are deleted and it first goes
+ * below the threshold, then shortly after goes empty.
+ *
+ * In this case, relocating it does delete it, but has
+ * some overhead in relocation specific metadata, looking
+ * for the non-existent extents and running some extra
+ * transactions, which we can avoid by using one of the
+ * other mechanisms for dealing with empty block groups.
+ */
+ if (!btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ btrfs_mark_bg_unused(bg);
+ spin_unlock(&bg->lock);
+ up_write(&space_info->groups_sem);
+ goto next;
+
+ }
+ /*
+ * The block group might no longer meet the reclaim condition by
+ * the time we get around to reclaiming it, so to avoid
+ * reclaiming overly full block_groups, skip reclaiming them.
+ *
+ * Since the decision making process also depends on the amount
+ * being freed, pass in a fake giant value to skip that extra
+ * check, which is more meaningful when adding to the list in
+ * the first place.
+ */
+ if (!should_reclaim_block_group(bg, bg->length)) {
+ spin_unlock(&bg->lock);
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
spin_unlock(&bg->lock);
/* Get out fast, in case we're unmounting the filesystem */
@@ -1742,8 +1811,8 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
write_sequnlock(&fs_info->profiles_lock);
}
-/**
- * Map a physical disk address to a list of logical addresses
+/*
+ * Map a physical disk address to a list of logical addresses.
*
* @fs_info: the filesystem
* @chunk_start: logical address of block group
@@ -2003,6 +2072,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->length = key->offset;
cache->used = btrfs_stack_block_group_used(bgi);
+ cache->commit_used = cache->used;
cache->flags = btrfs_stack_block_group_flags(bgi);
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
@@ -2483,7 +2553,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
- cache->needs_free_space = 1;
+ set_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &cache->runtime_flags);
ret = btrfs_load_block_group_zone_info(cache, true);
if (ret) {
@@ -2694,6 +2764,25 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_block_group_item bgi;
struct btrfs_key key;
+ u64 old_commit_used;
+ u64 used;
+
+ /*
+ * Block group items update can be triggered out of commit transaction
+ * critical section, thus we need a consistent view of used bytes.
+ * We cannot use cache->used directly outside of the spin lock, as it
+ * may be changed.
+ */
+ spin_lock(&cache->lock);
+ old_commit_used = cache->commit_used;
+ used = cache->used;
+ /* No change in used bytes, can safely skip it. */
+ if (cache->commit_used == used) {
+ spin_unlock(&cache->lock);
+ return 0;
+ }
+ cache->commit_used = used;
+ spin_unlock(&cache->lock);
key.objectid = cache->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@@ -2708,7 +2797,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
- btrfs_set_stack_block_group_used(&bgi, cache->used);
+ btrfs_set_stack_block_group_used(&bgi, used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
@@ -2716,6 +2805,12 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
fail:
btrfs_release_path(path);
+ /* We didn't update the block group item, need to revert @commit_used. */
+ if (ret < 0) {
+ spin_lock(&cache->lock);
+ cache->commit_used = old_commit_used;
+ spin_unlock(&cache->lock);
+ }
return ret;
}
@@ -3213,31 +3308,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
return ret;
}
-static inline bool should_reclaim_block_group(struct btrfs_block_group *bg,
- u64 bytes_freed)
-{
- const struct btrfs_space_info *space_info = bg->space_info;
- const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
- const u64 new_val = bg->used;
- const u64 old_val = new_val + bytes_freed;
- u64 thresh;
-
- if (reclaim_thresh == 0)
- return false;
-
- thresh = div_factor_fine(bg->length, reclaim_thresh);
-
- /*
- * If we were below the threshold before don't reclaim, we are likely a
- * brand new block group and we don't want to relocate new block groups.
- */
- if (old_val < thresh)
- return false;
- if (new_val >= thresh)
- return false;
- return true;
-}
-
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc)
{
@@ -3349,8 +3419,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
return ret;
}
-/**
- * btrfs_add_reserved_bytes - update the block_group and space info counters
+/*
+ * Update the block_group and space info counters.
+ *
* @cache: The cache we are manipulating
* @ram_bytes: The number of bytes of file content, and will be same to
* @num_bytes except for the compress path.
@@ -3393,8 +3464,9 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
return ret;
}
-/**
- * btrfs_free_reserved_bytes - update the block_group and space info counters
+/*
+ * Update the block_group and space info counters.
+ *
* @cache: The cache we are manipulating
* @num_bytes: The number of bytes in question
* @delalloc: The blocks are allocated for the delalloc write
@@ -3451,13 +3523,13 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
*/
if (force == CHUNK_ALLOC_LIMITED) {
thresh = btrfs_super_total_bytes(fs_info->super_copy);
- thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
+ thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1));
if (sinfo->total_bytes - bytes_used < thresh)
return 1;
}
- if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
+ if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80))
return 0;
return 1;
}