diff options
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 138 |
1 files changed, 104 insertions, 34 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b0ef071b3cb5..b128aaa9b870 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -656,6 +656,8 @@ xlog_cil_push_work( struct xfs_log_vec lvhdr = { NULL }; xfs_lsn_t commit_lsn; xfs_lsn_t push_seq; + struct bio bio; + DECLARE_COMPLETION_ONSTACK(bdev_flush); new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); @@ -668,9 +670,14 @@ xlog_cil_push_work( ASSERT(push_seq <= ctx->sequence); /* - * Wake up any background push waiters now this context is being pushed. + * As we are about to switch to a new, empty CIL context, we no longer + * need to throttle tasks on CIL space overruns. Wake any waiters that + * the hard push throttle may have caught so they can start committing + * to the new context. The ctx->xc_push_lock provides the serialisation + * necessary for safely using the lockless waitqueue_active() check in + * this context. */ - if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) + if (waitqueue_active(&cil->xc_push_wait)) wake_up_all(&cil->xc_push_wait); /* @@ -719,10 +726,19 @@ xlog_cil_push_work( spin_unlock(&cil->xc_push_lock); /* - * pull all the log vectors off the items in the CIL, and - * remove the items from the CIL. We don't need the CIL lock - * here because it's only needed on the transaction commit - * side which is currently locked out by the flush lock. + * The CIL is stable at this point - nothing new will be added to it + * because we hold the flush lock exclusively. Hence we can now issue + * a cache flush to ensure all the completed metadata in the journal we + * are about to overwrite is on stable storage. + */ + xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, + &bdev_flush); + + /* + * Pull all the log vectors off the items in the CIL, and remove the + * items from the CIL. We don't need the CIL lock here because it's only + * needed on the transaction commit side which is currently locked out + * by the flush lock. */ lv = NULL; num_iovecs = 0; @@ -772,7 +788,7 @@ xlog_cil_push_work( * that higher sequences will wait for us to write out a commit record * before they do. * - * xfs_log_force_lsn requires us to mirror the new sequence into the cil + * xfs_log_force_seq requires us to mirror the new sequence into the cil * structure atomically with the addition of this sequence to the * committing list. This also ensures that we can do unlocked checks * against the current sequence in log forces without risking @@ -806,7 +822,14 @@ xlog_cil_push_work( lvhdr.lv_iovecp = &lhdr; lvhdr.lv_next = ctx->lv_chain; - error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true); + /* + * Before we format and submit the first iclog, we have to ensure that + * the metadata writeback ordering cache flush is complete. + */ + wait_for_completion(&bdev_flush); + + error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, + XLOG_START_TRANS); if (error) goto out_abort_free_ticket; @@ -850,15 +873,21 @@ restart: xfs_log_ticket_ungrant(log, tic); - spin_lock(&commit_iclog->ic_callback_lock); + /* + * Once we attach the ctx to the iclog, a shutdown can process the + * iclog, run the callbacks and free the ctx. The only thing preventing + * this potential UAF situation here is that we are holding the + * icloglock. Hence we cannot access the ctx once we have attached the + * callbacks and dropped the icloglock. + */ + spin_lock(&log->l_icloglock); if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { - spin_unlock(&commit_iclog->ic_callback_lock); + spin_unlock(&log->l_icloglock); goto out_abort; } ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); - spin_unlock(&commit_iclog->ic_callback_lock); /* * now the checkpoint commit is complete and we've attached the @@ -870,8 +899,50 @@ restart: wake_up_all(&cil->xc_commit_wait); spin_unlock(&cil->xc_push_lock); - /* release the hounds! */ - xfs_log_release_iclog(commit_iclog); + /* + * If the checkpoint spans multiple iclogs, wait for all previous iclogs + * to complete before we submit the commit_iclog. We can't use state + * checks for this - ACTIVE can be either a past completed iclog or a + * future iclog being filled, while WANT_SYNC through SYNC_DONE can be a + * past or future iclog awaiting IO or ordered IO completion to be run. + * In the latter case, if it's a future iclog and we wait on it, the we + * will hang because it won't get processed through to ic_force_wait + * wakeup until this commit_iclog is written to disk. Hence we use the + * iclog header lsn and compare it to the commit lsn to determine if we + * need to wait on iclogs or not. + * + * NOTE: It is not safe to reference the ctx after this check as we drop + * the icloglock if we have to wait for completion of other iclogs. + */ + if (ctx->start_lsn != commit_lsn) { + xfs_lsn_t plsn; + + plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn); + if (plsn && XFS_LSN_CMP(plsn, commit_lsn) < 0) { + /* + * Waiting on ic_force_wait orders the completion of + * iclogs older than ic_prev. Hence we only need to wait + * on the most recent older iclog here. + */ + xlog_wait_on_iclog(commit_iclog->ic_prev); + spin_lock(&log->l_icloglock); + } + + /* + * We need to issue a pre-flush so that the ordering for this + * checkpoint is correctly preserved down to stable storage. + */ + commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + } + + /* + * The commit iclog must be written to stable storage to guarantee + * journal IO vs metadata writeback IO is correctly ordered on stable + * storage. + */ + commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; + xlog_state_release_iclog(log, commit_iclog); + spin_unlock(&log->l_icloglock); return; out_skip: @@ -907,7 +978,7 @@ xlog_cil_push_background( ASSERT(!list_empty(&cil->xc_cil)); /* - * don't do a background push if we haven't used up all the + * Don't do a background push if we haven't used up all the * space available yet. */ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) { @@ -931,9 +1002,16 @@ xlog_cil_push_background( /* * If we are well over the space limit, throttle the work that is being - * done until the push work on this context has begun. + * done until the push work on this context has begun. Enforce the hard + * throttle on all transaction commits once it has been activated, even + * if the committing transactions have resulted in the space usage + * dipping back down under the hard limit. + * + * The ctx->xc_push_lock provides the serialisation necessary for safely + * using the lockless waitqueue_active() check in this context. */ - if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { + if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) || + waitqueue_active(&cil->xc_push_wait)) { trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); ASSERT(cil->xc_ctx->space_used < log->l_logsize); xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); @@ -1008,16 +1086,14 @@ xlog_cil_empty( * allowed again. */ void -xfs_log_commit_cil( - struct xfs_mount *mp, +xlog_cil_commit( + struct xlog *log, struct xfs_trans *tp, - xfs_lsn_t *commit_lsn, + xfs_csn_t *commit_seq, bool regrant) { - struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; struct xfs_log_item *lip, *next; - xfs_lsn_t xc_commit_lsn; /* * Do all necessary memory allocation before we lock the CIL. @@ -1031,10 +1107,6 @@ xfs_log_commit_cil( xlog_cil_insert_items(log, tp); - xc_commit_lsn = cil->xc_ctx->sequence; - if (commit_lsn) - *commit_lsn = xc_commit_lsn; - if (regrant && !XLOG_FORCED_SHUTDOWN(log)) xfs_log_ticket_regrant(log, tp->t_ticket); else @@ -1057,8 +1129,10 @@ xfs_log_commit_cil( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); if (lip->li_ops->iop_committing) - lip->li_ops->iop_committing(lip, xc_commit_lsn); + lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence); } + if (commit_seq) + *commit_seq = cil->xc_ctx->sequence; /* xlog_cil_push_background() releases cil->xc_ctx_lock */ xlog_cil_push_background(log); @@ -1075,9 +1149,9 @@ xfs_log_commit_cil( * iclog flush is necessary following this call. */ xfs_lsn_t -xlog_cil_force_lsn( +xlog_cil_force_seq( struct xlog *log, - xfs_lsn_t sequence) + xfs_csn_t sequence) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; @@ -1173,21 +1247,17 @@ bool xfs_log_item_in_current_chkpt( struct xfs_log_item *lip) { - struct xfs_cil_ctx *ctx; + struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; if (list_empty(&lip->li_cil)) return false; - ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; - /* * li_seq is written on the first commit of a log item to record the * first checkpoint it is written to. Hence if it is different to the * current sequence, we're in a new checkpoint. */ - if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) - return false; - return true; + return lip->li_seq == ctx->sequence; } /* |