summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c822
1 files changed, 526 insertions, 296 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f7efc26aa82a..8ab33caf016f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -94,7 +94,7 @@ enum {
};
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_inode *inode,
+ struct btrfs_inode *inode,
int inode_only,
struct btrfs_log_ctx *ctx);
static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
@@ -207,7 +207,7 @@ again:
}
atomic_inc(&root->log_writers);
- if (ctx && !ctx->logging_new_name) {
+ if (!ctx->logging_new_name) {
int index = root->log_transid % 2;
list_add_tail(&ctx->list, &root->log_ctxs[index]);
ctx->log_transid = root->log_transid;
@@ -368,25 +368,11 @@ static int process_one_buffer(struct btrfs_root *log,
return ret;
}
-/*
- * Item overwrite used by replay and tree logging. eb, slot and key all refer
- * to the src data we are copying out.
- *
- * root is the tree we are copying into, and path is a scratch
- * path for use in this function (it should be released on entry and
- * will be released on exit).
- *
- * If the key is already in the destination tree the existing item is
- * overwritten. If the existing item isn't big enough, it is extended.
- * If it is too large, it is truncated.
- *
- * If the key isn't in the destination yet, a new item is inserted.
- */
-static noinline int overwrite_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
+static int do_overwrite_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *eb, int slot,
+ struct btrfs_key *key)
{
int ret;
u32 item_size;
@@ -403,10 +389,22 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
item_size = btrfs_item_size_nr(eb, slot);
src_ptr = btrfs_item_ptr_offset(eb, slot);
- /* look for the key in the destination tree */
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
- if (ret < 0)
- return ret;
+ /* Our caller must have done a search for the key for us. */
+ ASSERT(path->nodes[0] != NULL);
+
+ /*
+ * And the slot must point to the exact key or the slot where the key
+ * should be at (the first item with a key greater than 'key')
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ struct btrfs_key found_key;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+ ret = btrfs_comp_cpu_keys(&found_key, key);
+ ASSERT(ret >= 0);
+ } else {
+ ret = 1;
+ }
if (ret == 0) {
char *src_copy;
@@ -585,6 +583,36 @@ no_copy:
}
/*
+ * Item overwrite used by replay and tree logging. eb, slot and key all refer
+ * to the src data we are copying out.
+ *
+ * root is the tree we are copying into, and path is a scratch
+ * path for use in this function (it should be released on entry and
+ * will be released on exit).
+ *
+ * If the key is already in the destination tree the existing item is
+ * overwritten. If the existing item isn't big enough, it is extended.
+ * If it is too large, it is truncated.
+ *
+ * If the key isn't in the destination yet, a new item is inserted.
+ */
+static int overwrite_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *eb, int slot,
+ struct btrfs_key *key)
+{
+ int ret;
+
+ /* Look for the key in the destination tree. */
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ return do_overwrite_item(trans, root, path, eb, slot, key);
+}
+
+/*
* simple helper to read an inode off the disk from a given root
* This can only be called for subvolume roots and not for the log
*/
@@ -761,7 +789,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ins.objectid, ins.offset, 0);
btrfs_init_data_ref(&ref,
root->root_key.objectid,
- key->objectid, offset);
+ key->objectid, offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret)
goto out;
@@ -893,11 +921,11 @@ out:
* item
*/
static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_inode *dir,
struct btrfs_dir_item *di)
{
+ struct btrfs_root *root = dir->root;
struct inode *inode;
char *name;
int name_len;
@@ -926,7 +954,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name,
+ ret = btrfs_unlink_inode(trans, dir, BTRFS_I(inode), name,
name_len);
if (ret)
goto out;
@@ -939,9 +967,11 @@ out:
}
/*
- * helper function to see if a given name and sequence number found
- * in an inode back reference are already in a directory and correctly
- * point to this inode
+ * See if a given name and sequence number found in an inode back reference are
+ * already in a directory and correctly point to this inode.
+ *
+ * Returns: < 0 on error, 0 if the directory entry does not exists and 1 if it
+ * exists.
*/
static noinline int inode_in_dir(struct btrfs_root *root,
struct btrfs_path *path,
@@ -950,29 +980,34 @@ static noinline int inode_in_dir(struct btrfs_root *root,
{
struct btrfs_dir_item *di;
struct btrfs_key location;
- int match = 0;
+ int ret = 0;
di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
index, name, name_len, 0);
- if (di && !IS_ERR(di)) {
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ } else if (di) {
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
if (location.objectid != objectid)
goto out;
- } else
+ } else {
goto out;
- btrfs_release_path(path);
+ }
+ btrfs_release_path(path);
di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
- if (di && !IS_ERR(di)) {
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
- if (location.objectid != objectid)
- goto out;
- } else
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
goto out;
- match = 1;
+ } else if (di) {
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
+ if (location.objectid == objectid)
+ ret = 1;
+ }
out:
btrfs_release_path(path);
- return match;
+ return ret;
}
/*
@@ -1084,7 +1119,7 @@ again:
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
- ret = btrfs_unlink_inode(trans, root, dir, inode,
+ ret = btrfs_unlink_inode(trans, dir, inode,
victim_name, victim_name_len);
kfree(victim_name);
if (ret)
@@ -1155,7 +1190,7 @@ again:
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
- ret = btrfs_unlink_inode(trans, root,
+ ret = btrfs_unlink_inode(trans,
BTRFS_I(victim_parent),
inode,
victim_name,
@@ -1182,8 +1217,10 @@ next:
/* look for a conflicting sequence number */
di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
ref_index, name, namelen, 0);
- if (di && !IS_ERR(di)) {
- ret = drop_one_dir_item(trans, root, path, dir, di);
+ if (IS_ERR(di)) {
+ return PTR_ERR(di);
+ } else if (di) {
+ ret = drop_one_dir_item(trans, path, dir, di);
if (ret)
return ret;
}
@@ -1192,8 +1229,10 @@ next:
/* look for a conflicting name */
di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
name, namelen, 0);
- if (di && !IS_ERR(di)) {
- ret = drop_one_dir_item(trans, root, path, dir, di);
+ if (IS_ERR(di)) {
+ return PTR_ERR(di);
+ } else if (di) {
+ ret = drop_one_dir_item(trans, path, dir, di);
if (ret)
return ret;
}
@@ -1313,7 +1352,7 @@ again:
kfree(name);
goto out;
}
- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
inode, name, namelen);
kfree(name);
iput(dir);
@@ -1374,10 +1413,11 @@ out:
return ret;
}
-static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+static int add_link(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode, const char *name,
int namelen, u64 ref_index)
{
+ struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_dir_item *dir_item;
struct btrfs_key key;
struct btrfs_path *path;
@@ -1411,7 +1451,7 @@ static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
ret = -ENOENT;
goto out;
}
- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(other_inode),
+ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(other_inode),
name, namelen);
if (ret)
goto out;
@@ -1517,10 +1557,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- /* if we already have a perfect match, we're done */
- if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
- btrfs_ino(BTRFS_I(inode)), ref_index,
- name, namelen)) {
+ ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
+ btrfs_ino(BTRFS_I(inode)), ref_index,
+ name, namelen);
+ if (ret < 0) {
+ goto out;
+ } else if (ret == 0) {
/*
* look for a conflicting back reference in the
* metadata. if we find one we have to unlink that name
@@ -1555,7 +1597,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = btrfs_inode_ref_exists(inode, dir, key->type,
name, namelen);
if (ret > 0) {
- ret = btrfs_unlink_inode(trans, root,
+ ret = btrfs_unlink_inode(trans,
BTRFS_I(dir),
BTRFS_I(inode),
name, namelen);
@@ -1571,7 +1613,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
/* insert our name */
- ret = add_link(trans, root, dir, inode, name, namelen,
+ ret = add_link(trans, dir, inode, name, namelen,
ref_index);
if (ret)
goto out;
@@ -1580,6 +1622,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (ret)
goto out;
}
+ /* Else, ret == 1, we already have a perfect match, we're done. */
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
kfree(name);
@@ -1936,8 +1979,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct btrfs_key log_key;
struct inode *dir;
u8 log_type;
- int exists;
- int ret = 0;
+ bool exists;
+ int ret;
bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
bool name_added = false;
@@ -1957,12 +2000,12 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
name_len);
btrfs_dir_item_key_to_cpu(eb, di, &log_key);
- exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
- if (exists == 0)
- exists = 1;
- else
- exists = 0;
+ ret = btrfs_lookup_inode(trans, root, path, &log_key, 0);
btrfs_release_path(path);
+ if (ret < 0)
+ goto out;
+ exists = (ret == 0);
+ ret = 0;
if (key->type == BTRFS_DIR_ITEM_KEY) {
dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
@@ -1977,7 +2020,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
ret = -EINVAL;
goto out;
}
- if (IS_ERR_OR_NULL(dst_di)) {
+
+ if (IS_ERR(dst_di)) {
+ ret = PTR_ERR(dst_di);
+ goto out;
+ } else if (!dst_di) {
/* we need a sequence number to insert, so we only
* do inserts for the BTRFS_DIR_INDEX_KEY types
*/
@@ -2003,7 +2050,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
if (!exists)
goto out;
- ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di);
+ ret = drop_one_dir_item(trans, path, BTRFS_I(dir), dst_di);
if (ret)
goto out;
@@ -2233,13 +2280,13 @@ out:
* to is unlinked
*/
static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_root *log,
struct btrfs_path *path,
struct btrfs_path *log_path,
struct inode *dir,
struct btrfs_key *dir_key)
{
+ struct btrfs_root *root = BTRFS_I(dir)->root;
int ret;
struct extent_buffer *eb;
int slot;
@@ -2281,7 +2328,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (!log_di || log_di == ERR_PTR(-ENOENT)) {
+ if (!log_di) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -2300,7 +2347,7 @@ again:
}
inc_nlink(inode);
- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
BTRFS_I(inode), name, name_len);
if (!ret)
ret = btrfs_run_delayed_items(trans);
@@ -2482,7 +2529,9 @@ again:
else {
ret = find_dir_range(log, path, dirid, key_type,
&range_start, &range_end);
- if (ret != 0)
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
break;
}
@@ -2511,7 +2560,7 @@ again:
if (found_key.offset > range_end)
break;
- ret = check_item_in_log(trans, root, log, path,
+ ret = check_item_in_log(trans, log, path,
log_path, dir,
&found_key);
if (ret)
@@ -3019,9 +3068,6 @@ static void wait_for_writer(struct btrfs_root *root)
static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
- if (!ctx)
- return;
-
mutex_lock(&root->log_mutex);
list_del_init(&ctx->list);
mutex_unlock(&root->log_mutex);
@@ -3310,7 +3356,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* writing the super here would result in transid mismatches. If there
* is an error here just bail.
*/
- if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ if (BTRFS_FS_ERROR(fs_info)) {
ret = -EIO;
btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
@@ -3434,6 +3480,9 @@ static bool inode_logged(struct btrfs_trans_handle *trans,
if (inode->logged_trans == trans->transid)
return true;
+ if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state))
+ return false;
+
/*
* The inode's logged_trans is always 0 when we load it (because it is
* not persisted in the inode item or elsewhere). So if it is 0, the
@@ -3472,10 +3521,10 @@ static bool inode_logged(struct btrfs_trans_handle *trans,
* This optimizations allows us to avoid relogging the entire inode
* or the entire directory.
*/
-int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- struct btrfs_inode *dir, u64 index)
+void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ struct btrfs_inode *dir, u64 index)
{
struct btrfs_root *log;
struct btrfs_dir_item *di;
@@ -3485,11 +3534,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
u64 dir_ino = btrfs_ino(dir);
if (!inode_logged(trans, dir))
- return 0;
+ return;
ret = join_running_log_trans(root);
if (ret)
- return 0;
+ return;
mutex_lock(&dir->log_mutex);
@@ -3537,49 +3586,36 @@ fail:
btrfs_free_path(path);
out_unlock:
mutex_unlock(&dir->log_mutex);
- if (err == -ENOSPC) {
+ if (err < 0)
btrfs_set_log_full_commit(trans);
- err = 0;
- } else if (err < 0 && err != -ENOENT) {
- /* ENOENT can be returned if the entry hasn't been fsynced yet */
- btrfs_abort_transaction(trans, err);
- }
-
btrfs_end_log_trans(root);
-
- return err;
}
/* see comments for btrfs_del_dir_entries_in_log */
-int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- struct btrfs_inode *inode, u64 dirid)
+void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ struct btrfs_inode *inode, u64 dirid)
{
struct btrfs_root *log;
u64 index;
int ret;
if (!inode_logged(trans, inode))
- return 0;
+ return;
ret = join_running_log_trans(root);
if (ret)
- return 0;
+ return;
log = root->log_root;
mutex_lock(&inode->log_mutex);
ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
dirid, &index);
mutex_unlock(&inode->log_mutex);
- if (ret == -ENOSPC) {
+ if (ret < 0 && ret != -ENOENT)
btrfs_set_log_full_commit(trans);
- ret = 0;
- } else if (ret < 0 && ret != -ENOENT)
- btrfs_abort_transaction(trans, ret);
btrfs_end_log_trans(root);
-
- return ret;
}
/*
@@ -3615,31 +3651,231 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
return 0;
}
+static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
+ struct btrfs_root *log,
+ struct extent_buffer *src,
+ struct btrfs_path *dst_path,
+ int start_slot,
+ int count)
+{
+ char *ins_data = NULL;
+ struct btrfs_item_batch batch;
+ struct extent_buffer *dst;
+ unsigned long src_offset;
+ unsigned long dst_offset;
+ struct btrfs_key key;
+ u32 item_size;
+ int ret;
+ int i;
+
+ ASSERT(count > 0);
+ batch.nr = count;
+
+ if (count == 1) {
+ btrfs_item_key_to_cpu(src, &key, start_slot);
+ item_size = btrfs_item_size_nr(src, start_slot);
+ batch.keys = &key;
+ batch.data_sizes = &item_size;
+ batch.total_data_size = item_size;
+ } else {
+ struct btrfs_key *ins_keys;
+ u32 *ins_sizes;
+
+ ins_data = kmalloc(count * sizeof(u32) +
+ count * sizeof(struct btrfs_key), GFP_NOFS);
+ if (!ins_data)
+ return -ENOMEM;
+
+ ins_sizes = (u32 *)ins_data;
+ ins_keys = (struct btrfs_key *)(ins_data + count * sizeof(u32));
+ batch.keys = ins_keys;
+ batch.data_sizes = ins_sizes;
+ batch.total_data_size = 0;
+
+ for (i = 0; i < count; i++) {
+ const int slot = start_slot + i;
+
+ btrfs_item_key_to_cpu(src, &ins_keys[i], slot);
+ ins_sizes[i] = btrfs_item_size_nr(src, slot);
+ batch.total_data_size += ins_sizes[i];
+ }
+ }
+
+ ret = btrfs_insert_empty_items(trans, log, dst_path, &batch);
+ if (ret)
+ goto out;
+
+ dst = dst_path->nodes[0];
+ /*
+ * Copy all the items in bulk, in a single copy operation. Item data is
+ * organized such that it's placed at the end of a leaf and from right
+ * to left. For example, the data for the second item ends at an offset
+ * that matches the offset where the data for the first item starts, the
+ * data for the third item ends at an offset that matches the offset
+ * where the data of the second items starts, and so on.
+ * Therefore our source and destination start offsets for copy match the
+ * offsets of the last items (highest slots).
+ */
+ dst_offset = btrfs_item_ptr_offset(dst, dst_path->slots[0] + count - 1);
+ src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1);
+ copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size);
+ btrfs_release_path(dst_path);
+out:
+ kfree(ins_data);
+
+ return ret;
+}
+
+static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path,
+ struct btrfs_path *dst_path,
+ int key_type,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_root *log = inode->root->log_root;
+ struct extent_buffer *src = path->nodes[0];
+ const int nritems = btrfs_header_nritems(src);
+ const u64 ino = btrfs_ino(inode);
+ const bool inode_logged_before = inode_logged(trans, inode);
+ u64 last_logged_key_offset;
+ bool last_found = false;
+ int batch_start = 0;
+ int batch_size = 0;
+ int i;
+
+ if (key_type == BTRFS_DIR_ITEM_KEY)
+ last_logged_key_offset = inode->last_dir_item_offset;
+ else
+ last_logged_key_offset = inode->last_dir_index_offset;
+
+ for (i = path->slots[0]; i < nritems; i++) {
+ struct btrfs_key key;
+ int ret;
+
+ btrfs_item_key_to_cpu(src, &key, i);
+
+ if (key.objectid != ino || key.type != key_type) {
+ last_found = true;
+ break;
+ }
+
+ ctx->last_dir_item_offset = key.offset;
+ /*
+ * We must make sure that when we log a directory entry, the
+ * corresponding inode, after log replay, has a matching link
+ * count. For example:
+ *
+ * touch foo
+ * mkdir mydir
+ * sync
+ * ln foo mydir/bar
+ * xfs_io -c "fsync" mydir
+ * <crash>
+ * <mount fs and log replay>
+ *
+ * Would result in a fsync log that when replayed, our file inode
+ * would have a link count of 1, but we get two directory entries
+ * pointing to the same inode. After removing one of the names,
+ * it would not be possible to remove the other name, which
+ * resulted always in stale file handle errors, and would not be
+ * possible to rmdir the parent directory, since its i_size could
+ * never be decremented to the value BTRFS_EMPTY_DIR_SIZE,
+ * resulting in -ENOTEMPTY errors.
+ */
+ if (!ctx->log_new_dentries) {
+ struct btrfs_dir_item *di;
+ struct btrfs_key di_key;
+
+ di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
+ btrfs_dir_item_key_to_cpu(src, di, &di_key);
+ if ((btrfs_dir_transid(src, di) == trans->transid ||
+ btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
+ di_key.type != BTRFS_ROOT_ITEM_KEY)
+ ctx->log_new_dentries = true;
+ }
+
+ if (!inode_logged_before)
+ goto add_to_batch;
+
+ /*
+ * If we were logged before and have logged dir items, we can skip
+ * checking if any item with a key offset larger than the last one
+ * we logged is in the log tree, saving time and avoiding adding
+ * contention on the log tree.
+ */
+ if (key.offset > last_logged_key_offset)
+ goto add_to_batch;
+ /*
+ * Check if the key was already logged before. If not we can add
+ * it to a batch for bulk insertion.
+ */
+ ret = btrfs_search_slot(NULL, log, &key, dst_path, 0, 0);
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0) {
+ btrfs_release_path(dst_path);
+ goto add_to_batch;
+ }
+
+ /*
+ * Item exists in the log. Overwrite the item in the log if it
+ * has different content or do nothing if it has exactly the same
+ * content. And then flush the current batch if any - do it after
+ * overwriting the current item, or we would deadlock otherwise,
+ * since we are holding a path for the existing item.
+ */
+ ret = do_overwrite_item(trans, log, dst_path, src, i, &key);
+ if (ret < 0)
+ return ret;
+
+ if (batch_size > 0) {
+ ret = flush_dir_items_batch(trans, log, src, dst_path,
+ batch_start, batch_size);
+ if (ret < 0)
+ return ret;
+ batch_size = 0;
+ }
+ continue;
+add_to_batch:
+ if (batch_size == 0)
+ batch_start = i;
+ batch_size++;
+ }
+
+ if (batch_size > 0) {
+ int ret;
+
+ ret = flush_dir_items_batch(trans, log, src, dst_path,
+ batch_start, batch_size);
+ if (ret < 0)
+ return ret;
+ }
+
+ return last_found ? 1 : 0;
+}
+
/*
* log all the items included in the current transaction for a given
* directory. This also creates the range items in the log tree required
* to replay anything deleted before the fsync
*/
static noinline int log_dir_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path, int key_type,
struct btrfs_log_ctx *ctx,
u64 min_offset, u64 *last_offset_ret)
{
struct btrfs_key min_key;
+ struct btrfs_root *root = inode->root;
struct btrfs_root *log = root->log_root;
- struct extent_buffer *src;
int err = 0;
int ret;
- int i;
- int nritems;
u64 first_offset = min_offset;
u64 last_offset = (u64)-1;
u64 ino = btrfs_ino(inode);
- log = root->log_root;
-
min_key.objectid = ino;
min_key.type = key_type;
min_key.offset = min_offset;
@@ -3713,62 +3949,14 @@ search:
* from our directory
*/
while (1) {
- struct btrfs_key tmp;
- src = path->nodes[0];
- nritems = btrfs_header_nritems(src);
- for (i = path->slots[0]; i < nritems; i++) {
- struct btrfs_dir_item *di;
-
- btrfs_item_key_to_cpu(src, &min_key, i);
-
- if (min_key.objectid != ino || min_key.type != key_type)
- goto done;
-
- if (need_resched()) {
- btrfs_release_path(path);
- cond_resched();
- goto search;
- }
-
- ret = overwrite_item(trans, log, dst_path, src, i,
- &min_key);
- if (ret) {
+ ret = process_dir_items_leaf(trans, inode, path, dst_path,
+ key_type, ctx);
+ if (ret != 0) {
+ if (ret < 0)
err = ret;
- goto done;
- }
-
- /*
- * We must make sure that when we log a directory entry,
- * the corresponding inode, after log replay, has a
- * matching link count. For example:
- *
- * touch foo
- * mkdir mydir
- * sync
- * ln foo mydir/bar
- * xfs_io -c "fsync" mydir
- * <crash>
- * <mount fs and log replay>
- *
- * Would result in a fsync log that when replayed, our
- * file inode would have a link count of 1, but we get
- * two directory entries pointing to the same inode.
- * After removing one of the names, it would not be
- * possible to remove the other name, which resulted
- * always in stale file handle errors, and would not
- * be possible to rmdir the parent directory, since
- * its i_size could never decrement to the value
- * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
- */
- di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
- btrfs_dir_item_key_to_cpu(src, di, &tmp);
- if (ctx &&
- (btrfs_dir_transid(src, di) == trans->transid ||
- btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
- tmp.type != BTRFS_ROOT_ITEM_KEY)
- ctx->log_new_dentries = true;
+ goto done;
}
- path->slots[0] = nritems;
+ path->slots[0] = btrfs_header_nritems(path->nodes[0]);
/*
* look ahead to the next item and see if it is also
@@ -3782,21 +3970,26 @@ search:
err = ret;
goto done;
}
- btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
- if (tmp.objectid != ino || tmp.type != key_type) {
+ btrfs_item_key_to_cpu(path->nodes[0], &min_key, path->slots[0]);
+ if (min_key.objectid != ino || min_key.type != key_type) {
last_offset = (u64)-1;
goto done;
}
if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
ret = overwrite_item(trans, log, dst_path,
path->nodes[0], path->slots[0],
- &tmp);
+ &min_key);
if (ret)
err = ret;
else
- last_offset = tmp.offset;
+ last_offset = min_key.offset;
goto done;
}
+ if (need_resched()) {
+ btrfs_release_path(path);
+ cond_resched();
+ goto search;
+ }
}
done:
btrfs_release_path(path);
@@ -3829,7 +4022,7 @@ done:
* key logged by this transaction.
*/
static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path,
struct btrfs_log_ctx *ctx)
@@ -3839,11 +4032,33 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
int ret;
int key_type = BTRFS_DIR_ITEM_KEY;
+ /*
+ * If this is the first time we are being logged in the current
+ * transaction, or we were logged before but the inode was evicted and
+ * reloaded later, in which case its logged_trans is 0, reset the values
+ * of the last logged key offsets. Note that we don't use the helper
+ * function inode_logged() here - that is because the function returns
+ * true after an inode eviction, assuming the worst case as it can not
+ * know for sure if the inode was logged before. So we can not skip key
+ * searches in the case the inode was evicted, because it may not have
+ * been logged in this transaction and may have been logged in a past
+ * transaction, so we need to reset the last dir item and index offsets
+ * to (u64)-1.
+ */
+ if (inode->logged_trans != trans->transid) {
+ inode->last_dir_item_offset = (u64)-1;
+ inode->last_dir_index_offset = (u64)-1;
+ }
again:
min_key = 0;
max_key = 0;
+ if (key_type == BTRFS_DIR_ITEM_KEY)
+ ctx->last_dir_item_offset = inode->last_dir_item_offset;
+ else
+ ctx->last_dir_item_offset = inode->last_dir_index_offset;
+
while (1) {
- ret = log_dir_items(trans, root, inode, path, dst_path, key_type,
+ ret = log_dir_items(trans, inode, path, dst_path, key_type,
ctx, min_key, &max_key);
if (ret)
return ret;
@@ -3853,8 +4068,11 @@ again:
}
if (key_type == BTRFS_DIR_ITEM_KEY) {
+ inode->last_dir_item_offset = ctx->last_dir_item_offset;
key_type = BTRFS_DIR_INDEX_KEY;
goto again;
+ } else {
+ inode->last_dir_index_offset = ctx->last_dir_item_offset;
}
return 0;
}
@@ -3865,17 +4083,21 @@ again:
* This cannot be run for file data extents because it does not
* free the extents they point to.
*/
-static int drop_objectid_items(struct btrfs_trans_handle *trans,
+static int drop_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *log,
struct btrfs_path *path,
- u64 objectid, int max_key_type)
+ struct btrfs_inode *inode,
+ int max_key_type)
{
int ret;
struct btrfs_key key;
struct btrfs_key found_key;
int start_slot;
- key.objectid = objectid;
+ if (!inode_logged(trans, inode))
+ return 0;
+
+ key.objectid = btrfs_ino(inode);
key.type = max_key_type;
key.offset = (u64)-1;
@@ -3892,7 +4114,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
- if (found_key.objectid != objectid)
+ if (found_key.objectid != key.objectid)
break;
found_key.offset = 0;
@@ -3917,6 +4139,21 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
return ret;
}
+static int truncate_inode_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *log_root,
+ struct btrfs_inode *inode,
+ u64 new_size, u32 min_type)
+{
+ int ret;
+
+ do {
+ ret = btrfs_truncate_inode_items(trans, log_root, inode,
+ new_size, min_type, NULL);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
static void fill_inode_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf,
struct btrfs_inode_item *item,
@@ -4089,6 +4326,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
+ struct btrfs_item_batch batch;
char *ins_data;
int i;
struct list_head ordered_sums;
@@ -4103,13 +4341,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
+ batch.keys = ins_keys;
+ batch.data_sizes = ins_sizes;
+ batch.total_data_size = 0;
+ batch.nr = nr;
for (i = 0; i < nr; i++) {
ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
+ batch.total_data_size += ins_sizes[i];
btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
}
- ret = btrfs_insert_empty_items(trans, log, dst_path,
- ins_keys, ins_sizes, nr);
+ ret = btrfs_insert_empty_items(trans, log, dst_path, &batch);
if (ret) {
kfree(ins_data);
return ret;
@@ -4321,13 +4563,13 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
}
static int log_one_extent(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, struct btrfs_root *root,
+ struct btrfs_inode *inode,
const struct extent_map *em,
struct btrfs_path *path,
struct btrfs_log_ctx *ctx)
{
struct btrfs_drop_extents_args drop_args = { 0 };
- struct btrfs_root *log = root->log_root;
+ struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
struct btrfs_map_token token;
@@ -4340,14 +4582,25 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- drop_args.path = path;
- drop_args.start = em->start;
- drop_args.end = em->start + em->len;
- drop_args.replace_extent = true;
- drop_args.extent_item_size = sizeof(*fi);
- ret = btrfs_drop_extents(trans, log, inode, &drop_args);
- if (ret)
- return ret;
+ /*
+ * If this is the first time we are logging the inode in the current
+ * transaction, we can avoid btrfs_drop_extents(), which is expensive
+ * because it does a deletion search, which always acquires write locks
+ * for extent buffers at levels 2, 1 and 0. This not only wastes time
+ * but also adds significant contention in a log tree, since log trees
+ * are small, with a root at level 2 or 3 at most, due to their short
+ * life span.
+ */
+ if (inode_logged(trans, inode)) {
+ drop_args.path = path;
+ drop_args.start = em->start;
+ drop_args.end = em->start + em->len;
+ drop_args.replace_extent = true;
+ drop_args.extent_item_size = sizeof(*fi);
+ ret = btrfs_drop_extents(trans, log, inode, &drop_args);
+ if (ret)
+ return ret;
+ }
if (!drop_args.extent_inserted) {
key.objectid = btrfs_ino(inode);
@@ -4505,13 +4758,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
* Avoid logging extent items logged in past fsync calls
* and leading to duplicate keys in the log tree.
*/
- do {
- ret = btrfs_truncate_inode_items(trans,
- root->log_root,
- inode, truncate_offset,
- BTRFS_EXTENT_DATA_KEY,
- NULL);
- } while (ret == -EAGAIN);
+ ret = truncate_inode_items(trans, root->log_root, inode,
+ truncate_offset,
+ BTRFS_EXTENT_DATA_KEY);
if (ret)
goto out;
dropped_extents = true;
@@ -4538,7 +4787,6 @@ out:
}
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_log_ctx *ctx)
@@ -4603,7 +4851,7 @@ process:
write_unlock(&tree->lock);
- ret = log_one_extent(trans, inode, root, em, path, ctx);
+ ret = log_one_extent(trans, inode, em, path, ctx);
write_lock(&tree->lock);
clear_em_logging(tree, em);
free_extent_map(em);
@@ -4692,11 +4940,11 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
* with a journal, ext3/4, xfs, f2fs, etc).
*/
static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path)
{
+ struct btrfs_root *root = inode->root;
int ret;
struct btrfs_key key;
const u64 ino = btrfs_ino(inode);
@@ -4770,10 +5018,10 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
* truncate operation that changes the inode's size.
*/
static int btrfs_log_holes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_inode *inode,
struct btrfs_path *path)
{
+ struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
const u64 ino = btrfs_ino(inode);
@@ -5050,7 +5298,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
} else {
- ret = btrfs_log_inode(trans, root,
+ ret = btrfs_log_inode(trans,
BTRFS_I(inode),
LOG_OTHER_INODE_ALL,
ctx);
@@ -5110,8 +5358,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
* well because during a rename we pin the log and update the
* log with the new name before we unpin it.
*/
- ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
- LOG_OTHER_INODE, ctx);
+ ret = btrfs_log_inode(trans, BTRFS_I(inode), LOG_OTHER_INODE, ctx);
if (ret) {
btrfs_add_delayed_iput(inode);
continue;
@@ -5222,7 +5469,7 @@ again:
&other_ino, &other_parent);
if (ret < 0) {
return ret;
- } else if (ret > 0 && ctx &&
+ } else if (ret > 0 &&
other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
if (ins_nr > 0) {
ins_nr++;
@@ -5322,7 +5569,7 @@ next_key:
* This handles both files and directories.
*/
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_inode *inode,
+ struct btrfs_inode *inode,
int inode_only,
struct btrfs_log_ctx *ctx)
{
@@ -5330,7 +5577,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_path *dst_path;
struct btrfs_key min_key;
struct btrfs_key max_key;
- struct btrfs_root *log = root->log_root;
+ struct btrfs_root *log = inode->root->log_root;
int err = 0;
int ret = 0;
bool fast_search = false;
@@ -5372,22 +5619,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* Only run delayed items if we are a directory. We want to make sure
* all directory indexes hit the fs/subvolume tree so we can find them
* and figure out which index ranges have to be logged.
- *
- * Otherwise commit the delayed inode only if the full sync flag is set,
- * as we want to make sure an up to date version is in the subvolume
- * tree so copy_inode_items_to_log() / copy_items() can find it and copy
- * it to the log tree. For a non full sync, we always log the inode item
- * based on the in-memory struct btrfs_inode which is always up to date.
*/
- if (S_ISDIR(inode->vfs_inode.i_mode))
- ret = btrfs_commit_inode_delayed_items(trans, inode);
- else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
- ret = btrfs_commit_inode_delayed_inode(inode);
-
- if (ret) {
- btrfs_free_path(path);
- btrfs_free_path(dst_path);
- return ret;
+ if (S_ISDIR(inode->vfs_inode.i_mode)) {
+ err = btrfs_commit_inode_delayed_items(trans, inode);
+ if (err)
+ goto out;
}
if (inode_only == LOG_OTHER_INODE || inode_only == LOG_OTHER_INODE_ALL) {
@@ -5426,9 +5662,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags);
if (inode_only == LOG_INODE_EXISTS)
max_key_type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_objectid_items(trans, log, path, ino, max_key_type);
+ ret = drop_inode_items(trans, log, path, inode, max_key_type);
} else {
- if (inode_only == LOG_INODE_EXISTS) {
+ if (inode_only == LOG_INODE_EXISTS && inode_logged(trans, inode)) {
/*
* Make sure the new inode item we write to the log has
* the same isize as the current one (if it exists).
@@ -5450,19 +5686,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
&inode->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_objectid_items(trans, log, path, ino,
- max_key.type);
+ ret = drop_inode_items(trans, log, path, inode,
+ max_key.type);
} else {
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&inode->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&inode->runtime_flags);
- while(1) {
- ret = btrfs_truncate_inode_items(trans,
- log, inode, 0, 0, NULL);
- if (ret != -EAGAIN)
- break;
- }
+ if (inode_logged(trans, inode))
+ ret = truncate_inode_items(trans, log,
+ inode, 0, 0);
}
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&inode->runtime_flags) ||
@@ -5470,8 +5703,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (inode_only == LOG_INODE_ALL)
fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_objectid_items(trans, log, path, ino,
- max_key.type);
+ ret = drop_inode_items(trans, log, path, inode,
+ max_key.type);
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
@@ -5494,14 +5727,14 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
btrfs_release_path(dst_path);
- err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+ err = btrfs_log_all_xattrs(trans, inode, path, dst_path);
if (err)
goto out_unlock;
xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
- err = btrfs_log_holes(trans, root, inode, path);
+ err = btrfs_log_holes(trans, inode, path);
if (err)
goto out_unlock;
}
@@ -5521,16 +5754,14 @@ log_extents:
* BTRFS_INODE_COPY_EVERYTHING set.
*/
if (!xattrs_logged && inode->logged_trans < trans->transid) {
- err = btrfs_log_all_xattrs(trans, root, inode, path,
- dst_path);
+ err = btrfs_log_all_xattrs(trans, inode, path, dst_path);
if (err)
goto out_unlock;
btrfs_release_path(path);
}
}
if (fast_search) {
- ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
- ctx);
+ ret = btrfs_log_changed_extents(trans, inode, dst_path, ctx);
if (ret) {
err = ret;
goto out_unlock;
@@ -5545,59 +5776,52 @@ log_extents:
}
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
- ret = log_directory_changes(trans, root, inode, path, dst_path,
- ctx);
+ ret = log_directory_changes(trans, inode, path, dst_path, ctx);
if (ret) {
err = ret;
goto out_unlock;
}
}
+ spin_lock(&inode->lock);
+ inode->logged_trans = trans->transid;
/*
- * If we are logging that an ancestor inode exists as part of logging a
- * new name from a link or rename operation, don't mark the inode as
- * logged - otherwise if an explicit fsync is made against an ancestor,
- * the fsync considers the inode in the log and doesn't sync the log,
- * resulting in the ancestor missing after a power failure unless the
- * log was synced as part of an fsync against any other unrelated inode.
- * So keep it simple for this case and just don't flag the ancestors as
- * logged.
+ * Don't update last_log_commit if we logged that an inode exists.
+ * We do this for three reasons:
+ *
+ * 1) We might have had buffered writes to this inode that were
+ * flushed and had their ordered extents completed in this
+ * transaction, but we did not previously log the inode with
+ * LOG_INODE_ALL. Later the inode was evicted and after that
+ * it was loaded again and this LOG_INODE_EXISTS log operation
+ * happened. We must make sure that if an explicit fsync against
+ * the inode is performed later, it logs the new extents, an
+ * updated inode item, etc, and syncs the log. The same logic
+ * applies to direct IO writes instead of buffered writes.
+ *
+ * 2) When we log the inode with LOG_INODE_EXISTS, its inode item
+ * is logged with an i_size of 0 or whatever value was logged
+ * before. If later the i_size of the inode is increased by a
+ * truncate operation, the log is synced through an fsync of
+ * some other inode and then finally an explicit fsync against
+ * this inode is made, we must make sure this fsync logs the
+ * inode with the new i_size, the hole between old i_size and
+ * the new i_size, and syncs the log.
+ *
+ * 3) If we are logging that an ancestor inode exists as part of
+ * logging a new name from a link or rename operation, don't update
+ * its last_log_commit - otherwise if an explicit fsync is made
+ * against an ancestor, the fsync considers the inode in the log
+ * and doesn't sync the log, resulting in the ancestor missing after
+ * a power failure unless the log was synced as part of an fsync
+ * against any other unrelated inode.
*/
- if (!ctx ||
- !(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name &&
- &inode->vfs_inode != ctx->inode)) {
- spin_lock(&inode->lock);
- inode->logged_trans = trans->transid;
- /*
- * Don't update last_log_commit if we logged that an inode exists.
- * We do this for two reasons:
- *
- * 1) We might have had buffered writes to this inode that were
- * flushed and had their ordered extents completed in this
- * transaction, but we did not previously log the inode with
- * LOG_INODE_ALL. Later the inode was evicted and after that
- * it was loaded again and this LOG_INODE_EXISTS log operation
- * happened. We must make sure that if an explicit fsync against
- * the inode is performed later, it logs the new extents, an
- * updated inode item, etc, and syncs the log. The same logic
- * applies to direct IO writes instead of buffered writes.
- *
- * 2) When we log the inode with LOG_INODE_EXISTS, its inode item
- * is logged with an i_size of 0 or whatever value was logged
- * before. If later the i_size of the inode is increased by a
- * truncate operation, the log is synced through an fsync of
- * some other inode and then finally an explicit fsync against
- * this inode is made, we must make sure this fsync logs the
- * inode with the new i_size, the hole between old i_size and
- * the new i_size, and syncs the log.
- */
- if (inode_only != LOG_INODE_EXISTS)
- inode->last_log_commit = inode->last_sub_trans;
- spin_unlock(&inode->lock);
- }
+ if (inode_only != LOG_INODE_EXISTS)
+ inode->last_log_commit = inode->last_sub_trans;
+ spin_unlock(&inode->lock);
out_unlock:
mutex_unlock(&inode->log_mutex);
-
+out:
btrfs_free_path(path);
btrfs_free_path(dst_path);
return err;
@@ -5697,6 +5921,14 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_dir_list *dir_elem;
int ret = 0;
+ /*
+ * If we are logging a new name, as part of a link or rename operation,
+ * don't bother logging new dentries, as we just want to log the names
+ * of an inode and that any new parents exist.
+ */
+ if (ctx->logging_new_name)
+ return 0;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -5773,7 +6005,7 @@ process_leaf:
ctx->log_new_dentries = false;
if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
log_mode = LOG_INODE_ALL;
- ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
+ ret = btrfs_log_inode(trans, BTRFS_I(di_inode),
log_mode, ctx);
btrfs_add_delayed_iput(di_inode);
if (ret)
@@ -5917,11 +6149,10 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
continue;
}
- if (ctx)
- ctx->log_new_dentries = false;
- ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode),
+ ctx->log_new_dentries = false;
+ ret = btrfs_log_inode(trans, BTRFS_I(dir_inode),
LOG_INODE_ALL, ctx);
- if (!ret && ctx && ctx->log_new_dentries)
+ if (!ret && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
BTRFS_I(dir_inode), ctx);
btrfs_add_delayed_iput(dir_inode);
@@ -5967,7 +6198,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
if (BTRFS_I(inode)->generation >= trans->transid &&
need_log_inode(trans, BTRFS_I(inode)))
- ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
+ ret = btrfs_log_inode(trans, BTRFS_I(inode),
LOG_INODE_EXISTS, ctx);
btrfs_add_delayed_iput(inode);
if (ret)
@@ -6022,7 +6253,7 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
if (inode->generation >= trans->transid &&
need_log_inode(trans, inode)) {
- ret = btrfs_log_inode(trans, root, inode,
+ ret = btrfs_log_inode(trans, inode,
LOG_INODE_EXISTS, ctx);
if (ret)
break;
@@ -6165,7 +6396,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- ret = btrfs_log_inode(trans, root, inode, inode_only, ctx);
+ ret = btrfs_log_inode(trans, inode, inode_only, ctx);
if (ret)
goto end_trans;
@@ -6182,7 +6413,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_trans;
}
- if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries)
+ if (S_ISDIR(inode->vfs_inode.i_mode) && ctx->log_new_dentries)
log_dentries = true;
/*
@@ -6308,8 +6539,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
ret = walk_log_tree(trans, log_root_tree, &wc);
if (ret) {
- btrfs_handle_fs_error(fs_info, ret,
- "Failed to pin buffers while recovering log root tree.");
+ btrfs_abort_transaction(trans, ret);
goto error;
}
@@ -6322,8 +6552,7 @@ again:
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
if (ret < 0) {
- btrfs_handle_fs_error(fs_info, ret,
- "Couldn't find tree log root.");
+ btrfs_abort_transaction(trans, ret);
goto error;
}
if (ret > 0) {
@@ -6340,8 +6569,7 @@ again:
log = btrfs_read_tree_root(log_root_tree, &found_key);
if (IS_ERR(log)) {
ret = PTR_ERR(log);
- btrfs_handle_fs_error(fs_info, ret,
- "Couldn't read tree log root.");
+ btrfs_abort_transaction(trans, ret);
goto error;
}
@@ -6369,8 +6597,7 @@ again:
if (!ret)
goto next;
- btrfs_handle_fs_error(fs_info, ret,
- "Couldn't read target root for tree log recovery.");
+ btrfs_abort_transaction(trans, ret);
goto error;
}
@@ -6378,14 +6605,15 @@ again:
ret = btrfs_record_root_in_trans(trans, wc.replay_dest);
if (ret)
/* The loop needs to continue due to the root refs */
- btrfs_handle_fs_error(fs_info, ret,
- "failed to record the log root in transaction");
+ btrfs_abort_transaction(trans, ret);
else
ret = walk_log_tree(trans, log, &wc);
if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
ret = fixup_inode_link_counts(trans, wc.replay_dest,
path);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
}
if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
@@ -6402,6 +6630,8 @@ again:
* could only happen during mount.
*/
ret = btrfs_init_root_free_objectid(root);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
}
wc.replay_dest->log_root = NULL;