summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_cil.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r--fs/xfs/xfs_log_cil.c477
1 files changed, 278 insertions, 199 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 31e4ea2d19ac..23d6ceb5e97b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -68,6 +68,7 @@ xlog_cil_init(
ctx->sequence = 1;
ctx->cil = cil;
cil->xc_ctx = ctx;
+ cil->xc_current_sequence = ctx->sequence;
cil->xc_log = log;
log->l_cilp = cil;
@@ -145,102 +146,6 @@ xlog_cil_init_post_recovery(
}
/*
- * Insert the log item into the CIL and calculate the difference in space
- * consumed by the item. Add the space to the checkpoint ticket and calculate
- * if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
- * the current transaction ticket so that the accounting works out correctly.
- *
- * If this is the first time the item is being placed into the CIL in this
- * context, pin it so it can't be written to disk until the CIL is flushed to
- * the iclog and the iclog written to disk.
- */
-static void
-xlog_cil_insert(
- struct log *log,
- struct xlog_ticket *ticket,
- struct xfs_log_item *item,
- struct xfs_log_vec *lv)
-{
- struct xfs_cil *cil = log->l_cilp;
- struct xfs_log_vec *old = lv->lv_item->li_lv;
- struct xfs_cil_ctx *ctx = cil->xc_ctx;
- int len;
- int diff_iovecs;
- int iclog_space;
-
- if (old) {
- /* existing lv on log item, space used is a delta */
- ASSERT(!list_empty(&item->li_cil));
- ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-
- len = lv->lv_buf_len - old->lv_buf_len;
- diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
- kmem_free(old->lv_buf);
- kmem_free(old);
- } else {
- /* new lv, must pin the log item */
- ASSERT(!lv->lv_item->li_lv);
- ASSERT(list_empty(&item->li_cil));
-
- len = lv->lv_buf_len;
- diff_iovecs = lv->lv_niovecs;
- IOP_PIN(lv->lv_item);
-
- }
- len += diff_iovecs * sizeof(xlog_op_header_t);
-
- /* attach new log vector to log item */
- lv->lv_item->li_lv = lv;
-
- spin_lock(&cil->xc_cil_lock);
- list_move_tail(&item->li_cil, &cil->xc_cil);
- ctx->nvecs += diff_iovecs;
-
- /*
- * If this is the first time the item is being committed to the CIL,
- * store the sequence number on the log item so we can tell
- * in future commits whether this is the first checkpoint the item is
- * being committed into.
- */
- if (!item->li_seq)
- item->li_seq = ctx->sequence;
-
- /*
- * Now transfer enough transaction reservation to the context ticket
- * for the checkpoint. The context ticket is special - the unit
- * reservation has to grow as well as the current reservation as we
- * steal from tickets so we can correctly determine the space used
- * during the transaction commit.
- */
- if (ctx->ticket->t_curr_res == 0) {
- /* first commit in checkpoint, steal the header reservation */
- ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
- ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- ticket->t_curr_res -= ctx->ticket->t_unit_res;
- }
-
- /* do we need space for more log record headers? */
- iclog_space = log->l_iclog_size - log->l_iclog_hsize;
- if (len > 0 && (ctx->space_used / iclog_space !=
- (ctx->space_used + len) / iclog_space)) {
- int hdrs;
-
- hdrs = (len + iclog_space - 1) / iclog_space;
- /* need to take into account split region headers, too */
- hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
- ctx->ticket->t_unit_res += hdrs;
- ctx->ticket->t_curr_res += hdrs;
- ticket->t_curr_res -= hdrs;
- ASSERT(ticket->t_curr_res >= len);
- }
- ticket->t_curr_res -= len;
- ctx->space_used += len;
-
- spin_unlock(&cil->xc_cil_lock);
-}
-
-/*
* Format log item into a flat buffers
*
* For delayed logging, we need to hold a formatted buffer containing all the
@@ -269,15 +174,10 @@ xlog_cil_insert(
static void
xlog_cil_format_items(
struct log *log,
- struct xfs_log_vec *log_vector,
- struct xlog_ticket *ticket,
- xfs_lsn_t *start_lsn)
+ struct xfs_log_vec *log_vector)
{
struct xfs_log_vec *lv;
- if (start_lsn)
- *start_lsn = log->l_cilp->xc_ctx->sequence;
-
ASSERT(log_vector);
for (lv = log_vector; lv; lv = lv->lv_next) {
void *ptr;
@@ -290,7 +190,7 @@ xlog_cil_format_items(
len += lv->lv_iovecp[index].i_len;
lv->lv_buf_len = len;
- lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
+ lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
ptr = lv->lv_buf;
for (index = 0; index < lv->lv_niovecs; index++) {
@@ -301,97 +201,153 @@ xlog_cil_format_items(
ptr += vec->i_len;
}
ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
-
- xlog_cil_insert(log, ticket, lv->lv_item, lv);
}
}
-static void
-xlog_cil_free_logvec(
- struct xfs_log_vec *log_vector)
+/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+ struct log *log,
+ struct xfs_log_vec *lv,
+ int *len,
+ int *diff_iovecs)
{
- struct xfs_log_vec *lv;
+ struct xfs_log_vec *old = lv->lv_item->li_lv;
+
+ if (old) {
+ /* existing lv on log item, space used is a delta */
+ ASSERT(!list_empty(&lv->lv_item->li_cil));
+ ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+
+ *len += lv->lv_buf_len - old->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+ kmem_free(old->lv_buf);
+ kmem_free(old);
+ } else {
+ /* new lv, must pin the log item */
+ ASSERT(!lv->lv_item->li_lv);
+ ASSERT(list_empty(&lv->lv_item->li_cil));
+
+ *len += lv->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs;
+ IOP_PIN(lv->lv_item);
- for (lv = log_vector; lv; ) {
- struct xfs_log_vec *next = lv->lv_next;
- kmem_free(lv->lv_buf);
- kmem_free(lv);
- lv = next;
}
+
+ /* attach new log vector to log item */
+ lv->lv_item->li_lv = lv;
+
+ /*
+ * If this is the first time the item is being committed to the
+ * CIL, store the sequence number on the log item so we can
+ * tell in future commits whether this is the first checkpoint
+ * the item is being committed into.
+ */
+ if (!lv->lv_item->li_seq)
+ lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
}
/*
- * Commit a transaction with the given vector to the Committed Item List.
- *
- * To do this, we need to format the item, pin it in memory if required and
- * account for the space used by the transaction. Once we have done that we
- * need to release the unused reservation for the transaction, attach the
- * transaction to the checkpoint context so we carry the busy extents through
- * to checkpoint completion, and then unlock all the items in the transaction.
- *
- * For more specific information about the order of operations in
- * xfs_log_commit_cil() please refer to the comments in
- * xfs_trans_commit_iclog().
- *
- * Called with the context lock already held in read mode to lock out
- * background commit, returns without it held once background commits are
- * allowed again.
+ * Insert the log items into the CIL and calculate the difference in space
+ * consumed by the item. Add the space to the checkpoint ticket and calculate
+ * if the change requires additional log metadata. If it does, take that space
+ * as well. Remove the amount of space we addded to the checkpoint ticket from
+ * the current transaction ticket so that the accounting works out correctly.
*/
-int
-xfs_log_commit_cil(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
+static void
+xlog_cil_insert_items(
+ struct log *log,
struct xfs_log_vec *log_vector,
- xfs_lsn_t *commit_lsn,
- int flags)
+ struct xlog_ticket *ticket)
{
- struct log *log = mp->m_log;
- int log_flags = 0;
- int push = 0;
-
- if (flags & XFS_TRANS_RELEASE_LOG_RES)
- log_flags = XFS_LOG_REL_PERM_RESERV;
+ struct xfs_cil *cil = log->l_cilp;
+ struct xfs_cil_ctx *ctx = cil->xc_ctx;
+ struct xfs_log_vec *lv;
+ int len = 0;
+ int diff_iovecs = 0;
+ int iclog_space;
- if (XLOG_FORCED_SHUTDOWN(log)) {
- xlog_cil_free_logvec(log_vector);
- return XFS_ERROR(EIO);
- }
+ ASSERT(log_vector);
- /* lock out background commit */
- down_read(&log->l_cilp->xc_ctx_lock);
- xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
+ /*
+ * Do all the accounting aggregation and switching of log vectors
+ * around in a separate loop to the insertion of items into the CIL.
+ * Then we can do a separate loop to update the CIL within a single
+ * lock/unlock pair. This reduces the number of round trips on the CIL
+ * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+ * hold time for the transaction commit.
+ *
+ * If this is the first time the item is being placed into the CIL in
+ * this context, pin it so it can't be written to disk until the CIL is
+ * flushed to the iclog and the iclog written to disk.
+ *
+ * We can do this safely because the context can't checkpoint until we
+ * are done so it doesn't matter exactly how we update the CIL.
+ */
+ for (lv = log_vector; lv; lv = lv->lv_next)
+ xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
- /* check we didn't blow the reservation */
- if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(log->l_mp, tp->t_ticket);
+ /* account for space used by new iovec headers */
+ len += diff_iovecs * sizeof(xlog_op_header_t);
- /* attach the transaction to the CIL if it has any busy extents */
- if (!list_empty(&tp->t_busy)) {
- spin_lock(&log->l_cilp->xc_cil_lock);
- list_splice_init(&tp->t_busy,
- &log->l_cilp->xc_ctx->busy_extents);
- spin_unlock(&log->l_cilp->xc_cil_lock);
- }
+ spin_lock(&cil->xc_cil_lock);
- tp->t_commit_lsn = *commit_lsn;
- xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
- xfs_trans_unreserve_and_mod_sb(tp);
+ /* move the items to the tail of the CIL */
+ for (lv = log_vector; lv; lv = lv->lv_next)
+ list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
- /* check for background commit before unlock */
- if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
- push = 1;
- up_read(&log->l_cilp->xc_ctx_lock);
+ ctx->nvecs += diff_iovecs;
/*
- * We need to push CIL every so often so we don't cache more than we
- * can fit in the log. The limit really is that a checkpoint can't be
- * more than half the log (the current checkpoint is not allowed to
- * overwrite the previous checkpoint), but commit latency and memory
- * usage limit this to a smaller size in most cases.
+ * Now transfer enough transaction reservation to the context ticket
+ * for the checkpoint. The context ticket is special - the unit
+ * reservation has to grow as well as the current reservation as we
+ * steal from tickets so we can correctly determine the space used
+ * during the transaction commit.
*/
- if (push)
- xlog_cil_push(log, 0);
- return 0;
+ if (ctx->ticket->t_curr_res == 0) {
+ /* first commit in checkpoint, steal the header reservation */
+ ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
+ ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
+ ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ }
+
+ /* do we need space for more log record headers? */
+ iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+ if (len > 0 && (ctx->space_used / iclog_space !=
+ (ctx->space_used + len) / iclog_space)) {
+ int hdrs;
+
+ hdrs = (len + iclog_space - 1) / iclog_space;
+ /* need to take into account split region headers, too */
+ hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+ ctx->ticket->t_unit_res += hdrs;
+ ctx->ticket->t_curr_res += hdrs;
+ ticket->t_curr_res -= hdrs;
+ ASSERT(ticket->t_curr_res >= len);
+ }
+ ticket->t_curr_res -= len;
+ ctx->space_used += len;
+
+ spin_unlock(&cil->xc_cil_lock);
+}
+
+static void
+xlog_cil_free_logvec(
+ struct xfs_log_vec *log_vector)
+{
+ struct xfs_log_vec *lv;
+
+ for (lv = log_vector; lv; ) {
+ struct xfs_log_vec *next = lv->lv_next;
+ kmem_free(lv->lv_buf);
+ kmem_free(lv);
+ lv = next;
+ }
}
/*
@@ -427,13 +383,23 @@ xlog_cil_committed(
}
/*
- * Push the Committed Item List to the log. If the push_now flag is not set,
- * then it is a background flush and so we can chose to ignore it.
+ * Push the Committed Item List to the log. If @push_seq flag is zero, then it
+ * is a background flush and so we can chose to ignore it. Otherwise, if the
+ * current sequence is the same as @push_seq we need to do a flush. If
+ * @push_seq is less than the current sequence, then it has already been
+ * flushed and we don't need to do anything - the caller will wait for it to
+ * complete if necessary.
+ *
+ * @push_seq is a value rather than a flag because that allows us to do an
+ * unlocked check of the sequence number for a match. Hence we can allows log
+ * forces to run racily and not issue pushes for the same sequence twice. If we
+ * get a race between multiple pushes for the same sequence they will block on
+ * the first one and then abort, hence avoiding needless pushes.
*/
-int
+STATIC int
xlog_cil_push(
struct log *log,
- int push_now)
+ xfs_lsn_t push_seq)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_log_vec *lv;
@@ -453,12 +419,20 @@ xlog_cil_push(
if (!cil)
return 0;
+ ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
+
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
- /* lock out transaction commit, but don't block on background push */
+ /*
+ * Lock out transaction commit, but don't block for background pushes
+ * unless we are well over the CIL space limit. See the definition of
+ * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
+ * used here.
+ */
if (!down_write_trylock(&cil->xc_ctx_lock)) {
- if (!push_now)
+ if (!push_seq &&
+ cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
goto out_free_ticket;
down_write(&cil->xc_ctx_lock);
}
@@ -469,7 +443,11 @@ xlog_cil_push(
goto out_skip;
/* check for spurious background flush */
- if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ goto out_skip;
+
+ /* check for a previously pushed seqeunce */
+ if (push_seq && push_seq < cil->xc_ctx->sequence)
goto out_skip;
/*
@@ -515,6 +493,13 @@ xlog_cil_push(
cil->xc_ctx = new_ctx;
/*
+ * mirror the new sequence into the cil structure so that we can do
+ * unlocked checks against the current sequence in log forces without
+ * risking deferencing a freed context pointer.
+ */
+ cil->xc_current_sequence = new_ctx->sequence;
+
+ /*
* The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record,
* though - we need to synchronise with previous and future commits so
@@ -626,6 +611,105 @@ out_abort:
}
/*
+ * Commit a transaction with the given vector to the Committed Item List.
+ *
+ * To do this, we need to format the item, pin it in memory if required and
+ * account for the space used by the transaction. Once we have done that we
+ * need to release the unused reservation for the transaction, attach the
+ * transaction to the checkpoint context so we carry the busy extents through
+ * to checkpoint completion, and then unlock all the items in the transaction.
+ *
+ * For more specific information about the order of operations in
+ * xfs_log_commit_cil() please refer to the comments in
+ * xfs_trans_commit_iclog().
+ *
+ * Called with the context lock already held in read mode to lock out
+ * background commit, returns without it held once background commits are
+ * allowed again.
+ */
+int
+xfs_log_commit_cil(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_log_vec *log_vector,
+ xfs_lsn_t *commit_lsn,
+ int flags)
+{
+ struct log *log = mp->m_log;
+ int log_flags = 0;
+ int push = 0;
+
+ if (flags & XFS_TRANS_RELEASE_LOG_RES)
+ log_flags = XFS_LOG_REL_PERM_RESERV;
+
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ xlog_cil_free_logvec(log_vector);
+ return XFS_ERROR(EIO);
+ }
+
+ /*
+ * do all the hard work of formatting items (including memory
+ * allocation) outside the CIL context lock. This prevents stalling CIL
+ * pushes when we are low on memory and a transaction commit spends a
+ * lot of time in memory reclaim.
+ */
+ xlog_cil_format_items(log, log_vector);
+
+ /* lock out background commit */
+ down_read(&log->l_cilp->xc_ctx_lock);
+ if (commit_lsn)
+ *commit_lsn = log->l_cilp->xc_ctx->sequence;
+
+ xlog_cil_insert_items(log, log_vector, tp->t_ticket);
+
+ /* check we didn't blow the reservation */
+ if (tp->t_ticket->t_curr_res < 0)
+ xlog_print_tic_res(log->l_mp, tp->t_ticket);
+
+ /* attach the transaction to the CIL if it has any busy extents */
+ if (!list_empty(&tp->t_busy)) {
+ spin_lock(&log->l_cilp->xc_cil_lock);
+ list_splice_init(&tp->t_busy,
+ &log->l_cilp->xc_ctx->busy_extents);
+ spin_unlock(&log->l_cilp->xc_cil_lock);
+ }
+
+ tp->t_commit_lsn = *commit_lsn;
+ xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+ xfs_trans_unreserve_and_mod_sb(tp);
+
+ /*
+ * Once all the items of the transaction have been copied to the CIL,
+ * the items can be unlocked and freed.
+ *
+ * This needs to be done before we drop the CIL context lock because we
+ * have to update state in the log items and unlock them before they go
+ * to disk. If we don't, then the CIL checkpoint can race with us and
+ * we can run checkpoint completion before we've updated and unlocked
+ * the log items. This affects (at least) processing of stale buffers,
+ * inodes and EFIs.
+ */
+ xfs_trans_free_items(tp, *commit_lsn, 0);
+
+ /* check for background commit before unlock */
+ if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
+ push = 1;
+
+ up_read(&log->l_cilp->xc_ctx_lock);
+
+ /*
+ * We need to push CIL every so often so we don't cache more than we
+ * can fit in the log. The limit really is that a checkpoint can't be
+ * more than half the log (the current checkpoint is not allowed to
+ * overwrite the previous checkpoint), but commit latency and memory
+ * usage limit this to a smaller size in most cases.
+ */
+ if (push)
+ xlog_cil_push(log, 0);
+ return 0;
+}
+
+/*
* Conditionally push the CIL based on the sequence passed in.
*
* We only need to push if we haven't already pushed the sequence
@@ -639,39 +723,34 @@ out_abort:
* commit lsn is there. It'll be empty, so this is broken for now.
*/
xfs_lsn_t
-xlog_cil_push_lsn(
+xlog_cil_force_lsn(
struct log *log,
- xfs_lsn_t push_seq)
+ xfs_lsn_t sequence)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx;
xfs_lsn_t commit_lsn = NULLCOMMITLSN;
-restart:
- down_write(&cil->xc_ctx_lock);
- ASSERT(push_seq <= cil->xc_ctx->sequence);
-
- /* check to see if we need to force out the current context */
- if (push_seq == cil->xc_ctx->sequence) {
- up_write(&cil->xc_ctx_lock);
- xlog_cil_push(log, 1);
- goto restart;
- }
+ ASSERT(sequence <= cil->xc_current_sequence);
+
+ /*
+ * check to see if we need to force out the current context.
+ * xlog_cil_push() handles racing pushes for the same sequence,
+ * so no need to deal with it here.
+ */
+ if (sequence == cil->xc_current_sequence)
+ xlog_cil_push(log, sequence);
/*
* See if we can find a previous sequence still committing.
- * We can drop the flush lock as soon as we have the cil lock
- * because we are now only comparing contexts protected by
- * the cil lock.
- *
* We need to wait for all previous sequence commits to complete
* before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well.
*/
+restart:
spin_lock(&cil->xc_cil_lock);
- up_write(&cil->xc_ctx_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
- if (ctx->sequence > push_seq)
+ if (ctx->sequence > sequence)
continue;
if (!ctx->commit_lsn) {
/*
@@ -681,7 +760,7 @@ restart:
sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
goto restart;
}
- if (ctx->sequence != push_seq)
+ if (ctx->sequence != sequence)
continue;
/* found it! */
commit_lsn = ctx->commit_lsn;