summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2023-04-14 00:09:09 +0300
committerDave Chinner <dchinner@redhat.com>2023-04-14 00:09:09 +0300
commitb634abac59acc0e4397c5cf420278b32a6e0b69e (patch)
tree95193bc32cf9a2d8b595f256d568d6e48e0222d3 /fs/xfs/libxfs
parent793f5c2cca10d0f33d38563c142ee00942c3e21e (diff)
parent88accf17226733088923635b580779a3c86b6f23 (diff)
downloadlinux-b634abac59acc0e4397c5cf420278b32a6e0b69e.tar.xz
Merge tag 'scrub-drain-intents-6.4_2023-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next
xfs: drain deferred work items when scrubbing [v24.5] The design doc for XFS online fsck contains a long discussion of the eventual consistency models in use for XFS metadata. In that chapter, we note that it is possible for scrub to collide with a chain of deferred space metadata updates, and proposes a lightweight solution: The use of a pending-intents counter so that scrub can wait for the system to drain all chains. This patchset implements that scrub drain. The first patch implements the basic mechanism, and the subsequent patches reduce the runtime overhead by converting the implementation to use sloppy counters and introducing jump labels to avoid walking into scrub hooks when it isn't running. This last paradigm repeats elsewhere in this megaseries. v23.1: make intent items take an active ref to the perag structure and document why we bump and drop the intent counts when we do Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/libxfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag.c4
-rw-r--r--fs/xfs/libxfs/xfs_ag.h8
-rw-r--r--fs/xfs/libxfs/xfs_defer.c6
3 files changed, 16 insertions, 2 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 2d8910046ed9..1b078bbbf225 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -260,6 +260,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
+ xfs_defer_drain_free(&pag->pag_intents_drain);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_buf_hash_destroy(pag);
@@ -385,6 +386,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_state_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+ xfs_defer_drain_init(&pag->pag_intents_drain);
init_waitqueue_head(&pag->pagb_wait);
init_waitqueue_head(&pag->pag_active_wq);
pag->pagb_count = 0;
@@ -421,6 +423,7 @@ xfs_initialize_perag(
return 0;
out_remove_pag:
+ xfs_defer_drain_free(&pag->pag_intents_drain);
radix_tree_delete(&mp->m_perag_tree, index);
out_free_pag:
kmem_free(pag);
@@ -431,6 +434,7 @@ out_unwind_new_pags:
if (!pag)
break;
xfs_buf_hash_destroy(pag);
+ xfs_defer_drain_free(&pag->pag_intents_drain);
kmem_free(pag);
}
return error;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 8092eaba977d..2e0aef87d633 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -101,6 +101,14 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
+ /*
+ * We use xfs_drain to track the number of deferred log intent items
+ * that have been queued (but not yet processed) so that waiters (e.g.
+ * scrub) will not lock resources when other threads are in the middle
+ * of processing a chain of intent items only to find momentary
+ * inconsistencies.
+ */
+ struct xfs_defer_drain pag_intents_drain;
#endif /* __KERNEL__ */
};
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 5a321b783398..bcfb6a4203cd 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -397,6 +397,7 @@ xfs_defer_cancel_list(
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
+ trace_xfs_defer_cancel_item(mp, dfp, pwi);
ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
@@ -476,6 +477,7 @@ xfs_defer_finish_one(
list_for_each_safe(li, n, &dfp->dfp_work) {
list_del(li);
dfp->dfp_count--;
+ trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
error = ops->finish_item(tp, dfp->dfp_done, li, &state);
if (error == -EAGAIN) {
int ret;
@@ -623,7 +625,7 @@ xfs_defer_add(
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
- const struct xfs_defer_op_type *ops;
+ const struct xfs_defer_op_type *ops = defer_op_types[type];
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
@@ -636,7 +638,6 @@ xfs_defer_add(
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
- ops = defer_op_types[dfp->dfp_type];
if (dfp->dfp_type != type ||
(ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
@@ -653,6 +654,7 @@ xfs_defer_add(
}
list_add_tail(li, &dfp->dfp_work);
+ trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
dfp->dfp_count++;
}