summaryrefslogtreecommitdiff
path: root/fs/btrfs/send.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2022-11-01 19:15:53 +0300
committerDavid Sterba <dsterba@suse.com>2022-12-05 20:00:50 +0300
commitadf0241868bd46c7be8012ef99cb88c7f47c16ce (patch)
treef1657367c9b04a45be7e1ab58b3e2f223ebc76b5 /fs/btrfs/send.c
parentf73853c7168aef0e071c160979af05a148691e61 (diff)
downloadlinux-adf0241868bd46c7be8012ef99cb88c7f47c16ce.tar.xz
btrfs: send: skip resolution of our own backref when finding clone source
When doing backref walking to determine a source range to clone from, it is worthless to collect and resolve our own data backref, as we can't obviously use it as a clone source and it represents the range we want to clone into. Collecting the backref implies doing the extra work to resolve it, doing the search for a file extent item in a subvolume tree, etc. Skipping the data backref is valid as long as we only have the send root as the single clone root, otherwise the leaf with the file extent item may be accessible from another clone root due to shared subtrees created by snapshots, and therefore we have to collect the backref and resolve it. So add a callback to the backref walking code to guide it to skip data backrefs. This change is part of a patchset comprised of the following patches: 01/17 btrfs: fix inode list leak during backref walking at resolve_indirect_refs() 02/17 btrfs: fix inode list leak during backref walking at find_parent_nodes() 03/17 btrfs: fix ulist leaks in error paths of qgroup self tests 04/17 btrfs: remove pointless and double ulist frees in error paths of qgroup tests 05/17 btrfs: send: avoid unnecessary path allocations when finding extent clone 06/17 btrfs: send: update comment at find_extent_clone() 07/17 btrfs: send: drop unnecessary backref context field initializations 08/17 btrfs: send: avoid unnecessary backref lookups when finding clone source 09/17 btrfs: send: optimize clone detection to increase extent sharing 10/17 btrfs: use a single argument for extent offset in backref walking functions 11/17 btrfs: use a structure to pass arguments to backref walking functions 12/17 btrfs: reuse roots ulist on each leaf iteration for iterate_extent_inodes() 13/17 btrfs: constify ulist parameter of ulist_next() 14/17 btrfs: send: cache leaf to roots mapping during backref walking 15/17 btrfs: send: skip unnecessary backref iterations 16/17 btrfs: send: avoid double extent tree search when finding clone source 17/17 btrfs: send: skip resolution of our own backref when finding clone source The following test was run on non-debug kernel (Debian's default kernel config) before and after applying the patchset: $ cat test-send-many-shared-extents.sh #!/bin/bash DEV=/dev/sdh MNT=/mnt/sdh umount $DEV &> /dev/null mkfs.btrfs -f $DEV mount $DEV $MNT num_files=50000 num_clones_per_file=50 for ((i = 1; i <= $num_files; i++)); do xfs_io -f -c "pwrite 0 64K" $MNT/file_$i > /dev/null echo -ne "\r$i files created..." done echo btrfs subvolume snapshot -r $MNT $MNT/snap1 cloned=0 for ((i = 1; i <= $num_clones_per_file; i++)); do for ((j = 1; j <= $num_files; j++)); do cp --reflink=always $MNT/file_$j $MNT/file_${j}_clone_${i} cloned=$((cloned + 1)) echo -ne "\r$cloned / $((num_files * num_clones_per_file)) clone operations" done done echo btrfs subvolume snapshot -r $MNT $MNT/snap2 # Unmount and mount again to clear all cached metadata (and data). umount $DEV mount $DEV $MNT start=$(date +%s%N) btrfs send $MNT/snap2 > /dev/null end=$(date +%s%N) dur=$(( (end - start) / 1000000000 )) echo -e "\nFull send took $dur seconds" # Unmount and mount again to clear all cached metadata (and data). umount $DEV mount $DEV $MNT start=$(date +%s%N) btrfs send -p $MNT/snap1 $MNT/snap2 > /dev/null end=$(date +%s%N) dur=$(( (end - start) / 1000000000 )) echo -e "\nIncremental send took $dur seconds" umount $MNT Before applying the patchset: (...) Full send took 1108 seconds (...) Incremental send took 1135 seconds After applying the whole patchset: (...) Full send took 268 seconds (-75.8%) (...) Incremental send took 316 seconds (-72.2%) Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r--fs/btrfs/send.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 516b80637bfb..383bc8a5cb6c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1284,6 +1284,10 @@ struct backref_ctx {
/* The bytenr the file extent item we are processing refers to. */
u64 bytenr;
+ /* The owner (root id) of the data backref for the current extent. */
+ u64 backref_owner;
+ /* The offset of the data backref for the current extent. */
+ u64 backref_offset;
};
static int __clone_root_cmp_bsearch(const void *key, const void *elt)
@@ -1558,6 +1562,18 @@ static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
return 0;
}
+static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
+{
+ const struct backref_ctx *bctx = ctx;
+
+ if (ino == bctx->cur_objectid &&
+ root == bctx->backref_owner &&
+ offset == bctx->backref_offset)
+ return true;
+
+ return false;
+}
+
/*
* Given an inode, offset and extent item, it finds a good clone for a clone
* instruction. Returns -ENOENT when none could be found. The function makes
@@ -1624,6 +1640,12 @@ static int find_extent_clone(struct send_ctx *sctx,
backref_ctx.cur_objectid = ino;
backref_ctx.cur_offset = data_offset;
backref_ctx.bytenr = disk_byte;
+ /*
+ * Use the header owner and not the send root's id, because in case of a
+ * snapshot we can have shared subtrees.
+ */
+ backref_ctx.backref_owner = btrfs_header_owner(eb);
+ backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
/*
* The last extent of a file may be too large due to page alignment.
@@ -1648,6 +1670,17 @@ static int find_extent_clone(struct send_ctx *sctx,
backref_walk_ctx.check_extent_item = check_extent_item;
backref_walk_ctx.user_ctx = &backref_ctx;
+ /*
+ * If have a single clone root, then it's the send root and we can tell
+ * the backref walking code to skip our own backref and not resolve it,
+ * since we can not use it for cloning - the source and destination
+ * ranges can't overlap and in case the leaf is shared through a subtree
+ * due to snapshots, we can't use those other roots since they are not
+ * in the list of clone roots.
+ */
+ if (sctx->clone_roots_cnt == 1)
+ backref_walk_ctx.skip_data_ref = skip_self_data_ref;
+
ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
&backref_ctx);
if (ret < 0)