summaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 22:26:57 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 22:26:57 +0300
commit547635c6ac47c7556d6954935b189defe90422f7 (patch)
tree5545e515b7916df8a08aca89ec95401a54e00289 /fs/btrfs/volumes.c
parentf678c890c684373a387b0d73cd4d51edbf329c27 (diff)
parentc02d35d89b317994bd713ba82e160c5e7f22d9c8 (diff)
downloadlinux-547635c6ac47c7556d6954935b189defe90422f7.tar.xz
Merge tag 'for-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "No new features, the bulk of the changes are fixes, refactoring and cleanups. The notable fix is the scrub performance restoration after rewrite in 6.4, though still only partial. Fixes: - scrub performance drop due to rewrite in 6.4 partially restored: - do IO grouping by blg_plug/blk_unplug again - avoid unnecessary tree searches when processing stripes, in extent and checksum trees - the drop is noticeable on fast PCIe devices, -66% and restored to -33% of the original - backports to 6.4 planned - handle more corner cases of transaction commit during orphan cleanup or delayed ref processing - use correct fsid/metadata_uuid when validating super block - copy directory permissions and time when creating a stub subvolume Core: - debugging feature integrity checker deprecated, to be removed in 6.7 - in zoned mode, zones are activated just before the write, making error handling easier, now the overcommit mechanism can be enabled again which improves performance by avoiding more frequent flushing - v0 extent handling completely removed, deprecated long time ago - error handling improvements - tests: - extent buffer bitmap tests - pinned extent splitting tests - cleanups and refactoring: - compression writeback - extent buffer bitmap - space flushing, ENOSPC handling" * tag 'for-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (110 commits) btrfs: zoned: skip splitting and logical rewriting on pre-alloc write btrfs: tests: test invalid splitting when skipping pinned drop extent_map btrfs: tests: add a test for btrfs_add_extent_mapping btrfs: tests: add extent_map tests for dropping with odd layouts btrfs: scrub: move write back of repaired sectors to scrub_stripe_read_repair_worker() btrfs: scrub: don't go ordered workqueue for dev-replace btrfs: scrub: fix grouping of read IO btrfs: scrub: avoid unnecessary csum tree search preparing stripes btrfs: scrub: avoid unnecessary extent tree search preparing stripes btrfs: copy dir permission and time when creating a stub subvolume btrfs: remove pointless empty list check when reading delayed dir indexes btrfs: drop redundant check to use fs_devices::metadata_uuid btrfs: compare the correct fsid/metadata_uuid in btrfs_validate_super btrfs: use the correct superblock to compare fsid in btrfs_validate_super btrfs: simplify memcpy either of metadata_uuid or fsid btrfs: add a helper to read the superblock metadata_uuid btrfs: remove v0 extent handling btrfs: output extra debug info if we failed to find an inline backref btrfs: move the !zoned assert into run_delalloc_cow btrfs: consolidate the error handling in run_delalloc_nocow ...
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c94
1 files changed, 63 insertions, 31 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f6718999d183..9621455edebc 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -681,6 +681,14 @@ error_free_page:
return -EINVAL;
}
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
+{
+ bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+ return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
+}
+
/*
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
* being created with a disk that has already completed its fsid change. Such
@@ -833,15 +841,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
found_transid > fs_devices->latest_generation) {
memcpy(fs_devices->fsid, disk_super->fsid,
BTRFS_FSID_SIZE);
-
- if (has_metadata_uuid)
- memcpy(fs_devices->metadata_uuid,
- disk_super->metadata_uuid,
- BTRFS_FSID_SIZE);
- else
- memcpy(fs_devices->metadata_uuid,
- disk_super->fsid, BTRFS_FSID_SIZE);
-
+ memcpy(fs_devices->metadata_uuid,
+ btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE);
fs_devices->fsid_change = false;
}
}
@@ -851,8 +852,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (fs_devices->opened) {
btrfs_err(NULL,
- "device %s belongs to fsid %pU, and the fs is already mounted",
- path, fs_devices->fsid);
+"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
+ path, fs_devices->fsid, current->comm,
+ task_pid_nr(current));
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
}
@@ -1424,9 +1426,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
lockdep_assert_held(&device->fs_info->chunk_mutex);
- if (!find_first_extent_bit(&device->alloc_state, *start,
- &physical_start, &physical_end,
- CHUNK_ALLOCATED, NULL)) {
+ if (find_first_extent_bit(&device->alloc_state, *start,
+ &physical_start, &physical_end,
+ CHUNK_ALLOCATED, NULL)) {
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
@@ -1438,18 +1440,18 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
return false;
}
-static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
+static u64 dev_extent_search_start(struct btrfs_device *device)
{
switch (device->fs_devices->chunk_alloc_policy) {
case BTRFS_CHUNK_ALLOC_REGULAR:
- return max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
+ return BTRFS_DEVICE_RANGE_RESERVED;
case BTRFS_CHUNK_ALLOC_ZONED:
/*
* We don't care about the starting region like regular
* allocator, because we anyway use/reserve the first two zones
* for superblock logging.
*/
- return ALIGN(start, device->zone_info->zone_size);
+ return 0;
default:
BUG();
}
@@ -1581,15 +1583,15 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
* correct usable device space, as device extent freed in current transaction
* is not reported as available.
*/
-static int find_free_dev_extent_start(struct btrfs_device *device,
- u64 num_bytes, u64 search_start, u64 *start,
- u64 *len)
+static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *len)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_key key;
struct btrfs_dev_extent *dev_extent;
struct btrfs_path *path;
+ u64 search_start;
u64 hole_size;
u64 max_hole_start;
u64 max_hole_size;
@@ -1599,7 +1601,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
int slot;
struct extent_buffer *l;
- search_start = dev_extent_search_start(device, search_start);
+ search_start = dev_extent_search_start(device);
WARN_ON(device->zone_info &&
!IS_ALIGNED(num_bytes, device->zone_info->zone_size));
@@ -1725,13 +1727,6 @@ out:
return ret;
}
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *len)
-{
- /* FIXME use last free of some kind */
- return find_free_dev_extent_start(device, num_bytes, 0, start, len);
-}
-
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device,
u64 start, u64 *dev_extent_len)
@@ -6217,6 +6212,45 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
}
+/*
+ * Map one logical range to one or more physical ranges.
+ *
+ * @length: (Mandatory) mapped length of this run.
+ * One logical range can be split into different segments
+ * due to factors like zones and RAID0/5/6/10 stripe
+ * boundaries.
+ *
+ * @bioc_ret: (Mandatory) returned btrfs_io_context structure.
+ * which has one or more physical ranges (btrfs_io_stripe)
+ * recorded inside.
+ * Caller should call btrfs_put_bioc() to free it after use.
+ *
+ * @smap: (Optional) single physical range optimization.
+ * If the map request can be fulfilled by one single
+ * physical range, and this is parameter is not NULL,
+ * then @bioc_ret would be NULL, and @smap would be
+ * updated.
+ *
+ * @mirror_num_ret: (Mandatory) returned mirror number if the original
+ * value is 0.
+ *
+ * Mirror number 0 means to choose any live mirrors.
+ *
+ * For non-RAID56 profiles, non-zero mirror_num means
+ * the Nth mirror. (e.g. mirror_num 1 means the first
+ * copy).
+ *
+ * For RAID56 profile, mirror 1 means rebuild from P and
+ * the remaining data stripes.
+ *
+ * For RAID6 profile, mirror > 2 means mark another
+ * data/P stripe error and rebuild from the remaining
+ * stripes..
+ *
+ * @need_raid_map: (Used only for integrity checker) whether the map wants
+ * a full stripe map (including all data and P/Q stripes)
+ * for RAID56. Should always be 1 except integrity checker.
+ */
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
@@ -6391,9 +6425,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* I/O context structure.
*/
if (smap && num_alloc_stripes == 1 &&
- !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
- (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
- !dev_replace->tgtdev)) {
+ !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
if (mirror_num_ret)
*mirror_num_ret = mirror_num;