summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/tree-log.c93
2 files changed, 79 insertions, 27 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4a536387e992..18c88f514a0d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2103,19 +2103,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
- * If the inode needs a full sync, make sure we use a full range to
- * avoid log tree corruption, due to hole detection racing with ordered
- * extent completion for adjacent ranges, and assertion failures during
- * hole detection. Do this while holding the inode lock, to avoid races
- * with other tasks.
- */
- if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
- start = 0;
- end = LLONG_MAX;
- }
-
- /*
* Before we acquired the inode's lock, someone may have dirtied more
* pages in the target range. We need to make sure that writeback for
* any such pages does not start while we are logging the inode, because
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 5cf04d3f067d..0eebd6a02d41 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -96,8 +96,8 @@ enum {
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode,
int inode_only,
- const loff_t start,
- const loff_t end,
+ u64 start,
+ u64 end,
struct btrfs_log_ctx *ctx);
static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -4534,13 +4534,15 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
static int btrfs_log_holes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ const u64 start,
+ const u64 end)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
const u64 ino = btrfs_ino(inode);
const u64 i_size = i_size_read(&inode->vfs_inode);
- u64 prev_extent_end = 0;
+ u64 prev_extent_end = start;
int ret;
if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
@@ -4548,14 +4550,21 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = 0;
+ key.offset = start;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ret;
+ if (ret > 0 && path->slots[0] > 0) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
+ if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
+ path->slots[0]--;
+ }
+
while (true) {
struct extent_buffer *leaf = path->nodes[0];
+ u64 extent_end;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
@@ -4572,9 +4581,18 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
break;
+ extent_end = btrfs_file_extent_end(path);
+ if (extent_end <= start)
+ goto next_slot;
+
/* We have a hole, log it. */
if (prev_extent_end < key.offset) {
- const u64 hole_len = key.offset - prev_extent_end;
+ u64 hole_len;
+
+ if (key.offset >= end)
+ hole_len = end - prev_extent_end;
+ else
+ hole_len = key.offset - prev_extent_end;
/*
* Release the path to avoid deadlocks with other code
@@ -4604,16 +4622,20 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
}
- prev_extent_end = btrfs_file_extent_end(path);
+ prev_extent_end = min(extent_end, end);
+ if (extent_end >= end)
+ break;
+next_slot:
path->slots[0]++;
cond_resched();
}
- if (prev_extent_end < i_size) {
+ if (prev_extent_end < end && prev_extent_end < i_size) {
u64 hole_len;
btrfs_release_path(path);
- hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
+ hole_len = min(ALIGN(i_size, fs_info->sectorsize), end);
+ hole_len -= prev_extent_end;
ret = btrfs_insert_file_extent(trans, root->log_root,
ino, prev_extent_end, 0, 0,
hole_len, 0, hole_len,
@@ -4950,6 +4972,8 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
const u64 logged_isize,
const bool recursive_logging,
const int inode_only,
+ const u64 start,
+ const u64 end,
struct btrfs_log_ctx *ctx,
bool *need_log_inode_item)
{
@@ -4958,6 +4982,21 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
int ins_nr = 0;
int ret;
+ /*
+ * We must make sure we don't copy extent items that are entirely out of
+ * the range [start, end - 1]. This is not just an optimization to avoid
+ * copying but also needed to avoid a corruption where we end up with
+ * file extent items in the log tree that have overlapping ranges - this
+ * can happen if we race with ordered extent completion for ranges that
+ * are outside our target range. For example we copy an extent item and
+ * when we move to the next leaf, that extent was trimmed and a new one
+ * covering a subrange of it, but with a higher key, was inserted - we
+ * would then copy this other extent too, resulting in a log tree with
+ * 2 extent items that represent overlapping ranges.
+ *
+ * We can copy the entire extents at the range bondaries however, even
+ * if they cover an area outside the target range. That's ok.
+ */
while (1) {
ret = btrfs_search_forward(root, min_key, path, trans->transid);
if (ret < 0)
@@ -5025,6 +5064,29 @@ again:
goto next_slot;
}
+ if (min_key->type == BTRFS_EXTENT_DATA_KEY) {
+ const u64 extent_end = btrfs_file_extent_end(path);
+
+ if (extent_end <= start) {
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path,
+ path, ins_start_slot,
+ ins_nr, inode_only,
+ logged_isize);
+ if (ret < 0)
+ return ret;
+ ins_nr = 0;
+ }
+ goto next_slot;
+ }
+ if (extent_end >= end) {
+ ins_nr++;
+ if (ins_nr == 1)
+ ins_start_slot = path->slots[0];
+ break;
+ }
+ }
+
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
goto next_slot;
@@ -5090,8 +5152,8 @@ next_key:
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode,
int inode_only,
- const loff_t start,
- const loff_t end,
+ u64 start,
+ u64 end,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5119,6 +5181,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
+ start = ALIGN_DOWN(start, fs_info->sectorsize);
+ end = ALIGN(end, fs_info->sectorsize);
+
min_key.objectid = ino;
min_key.type = BTRFS_INODE_ITEM_KEY;
min_key.offset = 0;
@@ -5234,8 +5299,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
path, dst_path, logged_isize,
- recursive_logging, inode_only, ctx,
- &need_log_inode_item);
+ recursive_logging, inode_only,
+ start, end, ctx, &need_log_inode_item);
if (err)
goto out_unlock;
@@ -5248,7 +5313,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
- err = btrfs_log_holes(trans, root, inode, path);
+ err = btrfs_log_holes(trans, root, inode, path, start, end);
if (err)
goto out_unlock;
}