summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c17
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent_io.c19
-rw-r--r--fs/btrfs/free-space-cache.c16
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/volumes.c1
6 files changed, 107 insertions, 11 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 9de772ee0031..614aaa1969bd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -880,6 +880,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
* indirect refs to their parent bytenr.
* When roots are found, they're added to the roots list
*
+ * NOTE: This can return values > 0
+ *
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -1198,6 +1200,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
return ret;
}
+/**
+ * btrfs_check_shared - tell us whether an extent is shared
+ *
+ * @trans: optional trans handle
+ *
+ * btrfs_check_shared uses the backref walking code but will short
+ * circuit as soon as it finds a root or inode that doesn't match the
+ * one passed in. This provides a significant performance benefit for
+ * callers (such as fiemap) which want to know whether the extent is
+ * shared but do not need a ref count.
+ *
+ * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
+ */
int btrfs_check_shared(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 root_objectid,
u64 inum, u64 bytenr)
@@ -1226,11 +1241,13 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, root_objectid, inum);
if (ret == BACKREF_FOUND_SHARED) {
+ /* this is the only condition under which we return 1 */
ret = 1;
break;
}
if (ret < 0 && ret != -ENOENT)
break;
+ ret = 0;
node = ulist_next(tmp, &uiter);
if (!node)
break;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0ec8e228b89f..0ec3acd14cbf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
fail:
btrfs_release_path(path);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -3487,8 +3485,30 @@ again:
ret = 0;
}
}
- if (!ret)
+ if (!ret) {
ret = write_one_cache_group(trans, root, path, cache);
+ /*
+ * Our block group might still be attached to the list
+ * of new block groups in the transaction handle of some
+ * other task (struct btrfs_trans_handle->new_bgs). This
+ * means its block group item isn't yet in the extent
+ * tree. If this happens ignore the error, as we will
+ * try again later in the critical section of the
+ * transaction commit.
+ */
+ if (ret == -ENOENT) {
+ ret = 0;
+ spin_lock(&cur_trans->dirty_bgs_lock);
+ if (list_empty(&cache->dirty_list)) {
+ list_add_tail(&cache->dirty_list,
+ &cur_trans->dirty_bgs);
+ btrfs_get_block_group(cache);
+ }
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ } else if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ }
+ }
/* if its not on the io list, we need to put the block group */
if (should_put)
@@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
ret = 0;
}
}
- if (!ret)
+ if (!ret) {
ret = write_one_cache_group(trans, root, path, cache);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ }
/* if its not on the io list, we need to put the block group */
if (should_put)
@@ -8806,6 +8829,24 @@ again:
goto again;
}
+ /*
+ * if we are changing raid levels, try to allocate a corresponding
+ * block group with the new raid level.
+ */
+ alloc_flags = update_block_group_flags(root, cache->flags);
+ if (alloc_flags != cache->flags) {
+ ret = do_chunk_alloc(trans, root, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ /*
+ * ENOSPC is allowed here, we may have enough space
+ * already allocated at the new raid level to
+ * carry on
+ */
+ if (ret == -ENOSPC)
+ ret = 0;
+ if (ret < 0)
+ goto out;
+ }
ret = set_block_group_ro(cache, 0);
if (!ret)
@@ -8819,7 +8860,9 @@ again:
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(root, cache->flags);
+ lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, root, alloc_flags);
+ unlock_chunks(root->fs_info->chunk_root);
}
mutex_unlock(&root->fs_info->ro_block_group_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 43af5a61ad25..c32d226bfecc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
+ /*
+ * Lock our eb's refs_lock to avoid races with
+ * free_extent_buffer. When we get our eb it might be flagged
+ * with EXTENT_BUFFER_STALE and another task running
+ * free_extent_buffer might have seen that flag set,
+ * eb->refs == 2, that the buffer isn't under IO (dirty and
+ * writeback flags not set) and it's still in the tree (flag
+ * EXTENT_BUFFER_TREE_REF set), therefore being in the process
+ * of decrementing the extent buffer's reference count twice.
+ * So here we could race and increment the eb's reference count,
+ * clear its stale flag, mark it as dirty and drop our reference
+ * before the other task finishes executing free_extent_buffer,
+ * which would later result in an attempt to free an extent
+ * buffer that is dirty.
+ */
+ if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
+ spin_lock(&eb->refs_lock);
+ spin_unlock(&eb->refs_lock);
+ }
mark_extent_buffer_accessed(eb, NULL);
return eb;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 41c510b7cc11..9dbe5b548fa6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
mapping_set_gfp_mask(inode->i_mapping,
mapping_gfp_mask(inode->i_mapping) &
- ~(GFP_NOFS & ~__GFP_HIGHMEM));
+ ~(__GFP_FS | __GFP_HIGHMEM));
return inode;
}
@@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
int ret;
struct btrfs_io_ctl io_ctl;
+ bool release_metadata = true;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
@@ -3473,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
memset(&io_ctl, 0, sizeof(io_ctl));
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
trans, path, 0);
- if (!ret)
+ if (!ret) {
+ /*
+ * At this point writepages() didn't error out, so our metadata
+ * reservation is released when the writeback finishes, at
+ * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
+ * with or without an error.
+ */
+ release_metadata = false;
ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+ }
if (ret) {
- btrfs_delalloc_release_metadata(inode, inode->i_size);
+ if (release_metadata)
+ btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
btrfs_err(root->fs_info,
"failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 157cc54fc634..760c4a5e096b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
int ret = 0;
+ int ret_wb = 0;
u64 end;
u64 orig_end;
struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (ret)
return ret;
- ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
- if (ret)
- return ret;
+ /*
+ * If we have a writeback error don't return immediately. Wait first
+ * for any ordered extents that haven't completed yet. This is to make
+ * sure no one can dirty the same page ranges and call writepages()
+ * before the ordered extents complete - to avoid failures (-EEXIST)
+ * when adding the new ordered extents to the ordered tree.
+ */
+ ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
end = orig_end;
while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
break;
end--;
}
- return ret;
+ return ret_wb ? ret_wb : ret;
}
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 96aebf3bcd5b..174f5e1e00ab 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4625,6 +4625,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
{
u64 chunk_offset;
+ ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex));
chunk_offset = find_next_chunk(extent_root->fs_info);
return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
}