diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 162 |
1 files changed, 63 insertions, 99 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1c3a1189c0bd..03de89b45f27 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -162,7 +162,17 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) struct btrfs_root *root, *tmp; struct btrfs_caching_control *caching_ctl, *next; + /* + * At this point no one can be using this transaction to modify any tree + * and no one can start another transaction to modify any tree either. + */ + ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING); + down_write(&fs_info->commit_root_sem); + + if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) + fs_info->last_reloc_trans = trans->transid; + list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits, dirty_list) { list_del_init(&root->dirty_list); @@ -413,7 +423,6 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && root->last_trans < trans->transid) || force) { - WARN_ON(root == fs_info->extent_root); WARN_ON(!force && root->commit_root != root->node); /* @@ -628,7 +637,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_block_rsv_add(root, rsv, num_bytes, flush); + ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush); if (ret) goto reserve_fail; if (delayed_refs_bytes) { @@ -692,7 +701,6 @@ again: h->transid = cur_trans->transid; h->transaction = cur_trans; - h->root = root; refcount_set(&h->use_count, 1); h->fs_info = root->fs_info; @@ -1236,6 +1244,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) struct extent_buffer *eb; int ret; + /* + * At this point no one can be using this transaction to modify any tree + * and no one can start another transaction to modify any tree either. + */ + ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING); + eb = btrfs_lock_root_node(fs_info->tree_root); ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, BTRFS_NESTING_COW); @@ -1267,9 +1281,8 @@ again: root = list_entry(next, struct btrfs_root, dirty_list); clear_bit(BTRFS_ROOT_DIRTY, &root->state); - if (root != fs_info->extent_root) - list_add_tail(&root->dirty_list, - &trans->transaction->switch_commits); + list_add_tail(&root->dirty_list, + &trans->transaction->switch_commits); ret = update_cowonly_root(trans, root); if (ret) return ret; @@ -1299,9 +1312,6 @@ again: if (!list_empty(&fs_info->dirty_cowonly_roots)) goto again; - list_add_tail(&fs_info->extent_root->dirty_list, - &trans->transaction->switch_commits); - /* Update dev-replace pointer once everything is committed */ fs_info->dev_replace.committed_cursor_left = fs_info->dev_replace.cursor_left_last_write_of_item; @@ -1327,7 +1337,8 @@ void btrfs_add_dead_root(struct btrfs_root *root) } /* - * update all the cowonly tree roots on disk + * Update each subvolume root and its relocation root, if it exists, in the tree + * of tree roots. Also free log roots if they exist. */ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) { @@ -1336,6 +1347,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) int i; int ret; + /* + * At this point no one can be using this transaction to modify any tree + * and no one can start another transaction to modify any tree either. + */ + ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING); + spin_lock(&fs_info->fs_roots_radix_lock); while (1) { ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, @@ -1348,6 +1365,14 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) struct btrfs_root *root = gang[i]; int ret2; + /* + * At this point we can neither have tasks logging inodes + * from a root nor trying to commit a log tree. + */ + ASSERT(atomic_read(&root->log_writers) == 0); + ASSERT(atomic_read(&root->log_commit[0]) == 0); + ASSERT(atomic_read(&root->log_commit[1]) == 0); + radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); @@ -1472,12 +1497,6 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, return ret; } - /* - * We are going to commit transaction, see btrfs_commit_transaction() - * comment for reason locking tree_log_mutex - */ - mutex_lock(&fs_info->tree_log_mutex); - ret = commit_fs_roots(trans); if (ret) goto out; @@ -1513,8 +1532,6 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, "Error while writing out transaction for qgroup"); out: - mutex_unlock(&fs_info->tree_log_mutex); - /* * Force parent root to be updated, as we recorded it before so its * last_trans == cur_transid. @@ -1578,7 +1595,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_reloc_pre_snapshot(pending, &to_reserve); if (to_reserve > 0) { - pending->error = btrfs_block_rsv_add(root, + pending->error = btrfs_block_rsv_add(fs_info, &pending->block_rsv, to_reserve, BTRFS_RESERVE_NO_FLUSH); @@ -1861,50 +1878,14 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) return ret; } -/* - * commit transactions asynchronously. once btrfs_commit_transaction_async - * returns, any subsequent transaction will not be allowed to join. - */ -struct btrfs_async_commit { - struct btrfs_trans_handle *newtrans; - struct work_struct work; -}; - -static void do_async_commit(struct work_struct *work) -{ - struct btrfs_async_commit *ac = - container_of(work, struct btrfs_async_commit, work); - - /* - * We've got freeze protection passed with the transaction. - * Tell lockdep about it. - */ - if (ac->newtrans->type & __TRANS_FREEZABLE) - __sb_writers_acquired(ac->newtrans->fs_info->sb, SB_FREEZE_FS); - - current->journal_info = ac->newtrans; - - btrfs_commit_transaction(ac->newtrans); - kfree(ac); -} - -int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans) +void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_async_commit *ac; struct btrfs_transaction *cur_trans; - ac = kmalloc(sizeof(*ac), GFP_NOFS); - if (!ac) - return -ENOMEM; - - INIT_WORK(&ac->work, do_async_commit); - ac->newtrans = btrfs_join_transaction(trans->root); - if (IS_ERR(ac->newtrans)) { - int err = PTR_ERR(ac->newtrans); - kfree(ac); - return err; - } + /* Kick the transaction kthread. */ + set_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags); + wake_up_process(fs_info->transaction_kthread); /* take transaction reference */ cur_trans = trans->transaction; @@ -1913,28 +1894,15 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans) btrfs_end_transaction(trans); /* - * Tell lockdep we've released the freeze rwsem, since the - * async commit thread will be the one to unlock it. - */ - if (ac->newtrans->type & __TRANS_FREEZABLE) - __sb_writers_release(fs_info->sb, SB_FREEZE_FS); - - schedule_work(&ac->work); - /* * Wait for the current transaction commit to start and block * subsequent transaction joins */ wait_event(fs_info->transaction_blocked_wait, cur_trans->state >= TRANS_STATE_COMMIT_START || TRANS_ABORTED(cur_trans)); - if (current->journal_info == trans) - current->journal_info = NULL; - btrfs_put_transaction(cur_trans); - return 0; } - static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1986,7 +1954,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) btrfs_put_transaction(cur_trans); btrfs_put_transaction(cur_trans); - trace_btrfs_transaction_commit(trans->root); + trace_btrfs_transaction_commit(fs_info); if (current->journal_info == trans) current->journal_info = NULL; @@ -2200,6 +2168,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); + /* + * We've started the commit, clear the flag in case we were triggered to + * do an async commit but somebody else started before the transaction + * kthread could do the work. + */ + clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags); + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; goto scrub_continue; @@ -2246,24 +2221,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) WARN_ON(cur_trans != trans->transaction); - /* btrfs_commit_tree_roots is responsible for getting the - * various roots consistent with each other. Every pointer - * in the tree of tree roots has to point to the most up to date - * root for every subvolume and other tree. So, we have to keep - * the tree logging code from jumping in and changing any - * of the trees. - * - * At this point in the commit, there can't be any tree-log - * writers, but a little lower down we drop the trans mutex - * and let new people in. By holding the tree_log_mutex - * from now until after the super is written, we avoid races - * with the tree-log code. - */ - mutex_lock(&fs_info->tree_log_mutex); - ret = commit_fs_roots(trans); if (ret) - goto unlock_tree_log; + goto unlock_reloc; /* * Since the transaction is done, we can apply the pending changes @@ -2282,11 +2242,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ ret = btrfs_qgroup_account_extents(trans); if (ret < 0) - goto unlock_tree_log; + goto unlock_reloc; ret = commit_cowonly_roots(trans); if (ret) - goto unlock_tree_log; + goto unlock_reloc; /* * The tasks which save the space cache and inode cache may also @@ -2294,7 +2254,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; - goto unlock_tree_log; + goto unlock_reloc; } cur_trans = fs_info->running_transaction; @@ -2327,6 +2287,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) btrfs_trans_release_chunk_metadata(trans); + /* + * Before changing the transaction state to TRANS_STATE_UNBLOCKED and + * setting fs_info->running_transaction to NULL, lock tree_log_mutex to + * make sure that before we commit our superblock, no other task can + * start a new transaction and commit a log tree before we commit our + * superblock. Anyone trying to commit a log tree locks this mutex before + * writing its superblock. + */ + mutex_lock(&fs_info->tree_log_mutex); + spin_lock(&fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; fs_info->running_transaction = NULL; @@ -2339,10 +2309,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) { btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction"); - /* - * reloc_mutex has been unlocked, tree_log_mutex is still held - * but we can't jump to unlock_tree_log causing double unlock - */ mutex_unlock(&fs_info->tree_log_mutex); goto scrub_continue; } @@ -2393,7 +2359,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(fs_info->sb); - trace_btrfs_transaction_commit(trans->root); + trace_btrfs_transaction_commit(fs_info); btrfs_scrub_continue(fs_info); @@ -2404,8 +2370,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; -unlock_tree_log: - mutex_unlock(&fs_info->tree_log_mutex); unlock_reloc: mutex_unlock(&fs_info->reloc_mutex); scrub_continue: |