diff options
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 101 |
1 files changed, 49 insertions, 52 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8e2f8275a253..fcb9175016a5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal, transaction->t_start_time = ktime_get(); transaction->t_tid = journal->j_transaction_sequence++; transaction->t_expires = jiffies + journal->j_commit_interval; - spin_lock_init(&transaction->t_handle_lock); atomic_set(&transaction->t_updates, 0); atomic_set(&transaction->t_outstanding_credits, jbd2_descriptor_blocks_per_trans(journal) + @@ -139,26 +138,22 @@ static void jbd2_get_transaction(journal_t *journal, /* * Update transaction's maximum wait time, if debugging is enabled. * - * In order for t_max_wait to be reliable, it must be protected by a - * lock. But doing so will mean that start_this_handle() can not be - * run in parallel on SMP systems, which limits our scalability. So - * unless debugging is enabled, we no longer update t_max_wait, which - * means that maximum wait time reported by the jbd2_run_stats - * tracepoint will always be zero. + * t_max_wait is carefully updated here with use of atomic compare exchange. + * Note that there could be multiplre threads trying to do this simultaneously + * hence using cmpxchg to avoid any use of locks in this case. + * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug. */ static inline void update_t_max_wait(transaction_t *transaction, unsigned long ts) { -#ifdef CONFIG_JBD2_DEBUG - if (jbd2_journal_enable_debug && - time_after(transaction->t_start, ts)) { - ts = jbd2_time_diff(ts, transaction->t_start); - spin_lock(&transaction->t_handle_lock); - if (ts > transaction->t_max_wait) - transaction->t_max_wait = ts; - spin_unlock(&transaction->t_handle_lock); + unsigned long oldts, newts; + + if (time_after(transaction->t_start, ts)) { + newts = jbd2_time_diff(ts, transaction->t_start); + oldts = READ_ONCE(transaction->t_max_wait); + while (oldts < newts) + oldts = cmpxchg(&transaction->t_max_wait, oldts, newts); } -#endif } /* @@ -690,7 +685,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) DIV_ROUND_UP( handle->h_revoke_credits_requested, journal->j_revoke_records_per_block); - spin_lock(&transaction->t_handle_lock); wanted = atomic_add_return(nblocks, &transaction->t_outstanding_credits); @@ -698,7 +692,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) jbd_debug(3, "denied handle %p %d blocks: " "transaction too large\n", handle, nblocks); atomic_sub(nblocks, &transaction->t_outstanding_credits); - goto unlock; + goto error_out; } trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, @@ -714,8 +708,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) result = 0; jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); -unlock: - spin_unlock(&transaction->t_handle_lock); error_out: read_unlock(&journal->j_state_lock); return result; @@ -842,27 +834,35 @@ EXPORT_SYMBOL(jbd2_journal_restart); */ void jbd2_journal_wait_updates(journal_t *journal) { - transaction_t *commit_transaction = journal->j_running_transaction; + DEFINE_WAIT(wait); - if (!commit_transaction) - return; + while (1) { + /* + * Note that the running transaction can get freed under us if + * this transaction is getting committed in + * jbd2_journal_commit_transaction() -> + * jbd2_journal_free_transaction(). This can only happen when we + * release j_state_lock -> schedule() -> acquire j_state_lock. + * Hence we should everytime retrieve new j_running_transaction + * value (after j_state_lock release acquire cycle), else it may + * lead to use-after-free of old freed transaction. + */ + transaction_t *transaction = journal->j_running_transaction; - spin_lock(&commit_transaction->t_handle_lock); - while (atomic_read(&commit_transaction->t_updates)) { - DEFINE_WAIT(wait); + if (!transaction) + break; prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&commit_transaction->t_updates)) { - spin_unlock(&commit_transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - write_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); + TASK_UNINTERRUPTIBLE); + if (!atomic_read(&transaction->t_updates)) { + finish_wait(&journal->j_wait_updates, &wait); + break; } + write_unlock(&journal->j_state_lock); + schedule(); finish_wait(&journal->j_wait_updates, &wait); + write_lock(&journal->j_state_lock); } - spin_unlock(&commit_transaction->t_handle_lock); } /** @@ -877,8 +877,6 @@ void jbd2_journal_wait_updates(journal_t *journal) */ void jbd2_journal_lock_updates(journal_t *journal) { - DEFINE_WAIT(wait); - jbd2_might_wait_for_commit(journal); write_lock(&journal->j_state_lock); @@ -2219,14 +2217,14 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) } /* - * jbd2_journal_invalidatepage + * jbd2_journal_invalidate_folio * * This code is tricky. It has a number of cases to deal with. * * There are two invariants which this code relies on: * - * i_size must be updated on disk before we start calling invalidatepage on the - * data. + * i_size must be updated on disk before we start calling invalidate_folio + * on the data. * * This is done in ext3 by defining an ext3_setattr method which * updates i_size before truncate gets going. By maintaining this @@ -2428,9 +2426,9 @@ zap_buffer_unlocked: } /** - * jbd2_journal_invalidatepage() + * jbd2_journal_invalidate_folio() * @journal: journal to use for flush... - * @page: page to flush + * @folio: folio to flush * @offset: start of the range to invalidate * @length: length of the range to invalidate * @@ -2439,30 +2437,29 @@ zap_buffer_unlocked: * the page is straddling i_size. Caller then has to wait for current commit * and try again. */ -int jbd2_journal_invalidatepage(journal_t *journal, - struct page *page, - unsigned int offset, - unsigned int length) +int jbd2_journal_invalidate_folio(journal_t *journal, struct folio *folio, + size_t offset, size_t length) { struct buffer_head *head, *bh, *next; unsigned int stop = offset + length; unsigned int curr_off = 0; - int partial_page = (offset || length < PAGE_SIZE); + int partial_page = (offset || length < folio_size(folio)); int may_free = 1; int ret = 0; - if (!PageLocked(page)) + if (!folio_test_locked(folio)) BUG(); - if (!page_has_buffers(page)) + head = folio_buffers(folio); + if (!head) return 0; - BUG_ON(stop > PAGE_SIZE || stop < length); + BUG_ON(stop > folio_size(folio) || stop < length); /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be * cautious in our locking. */ - head = bh = page_buffers(page); + bh = head; do { unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; @@ -2485,8 +2482,8 @@ int jbd2_journal_invalidatepage(journal_t *journal, } while (bh != head); if (!partial_page) { - if (may_free && try_to_free_buffers(page)) - J_ASSERT(!page_has_buffers(page)); + if (may_free && try_to_free_buffers(&folio->page)) + J_ASSERT(!folio_buffers(folio)); } return 0; } |