summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig5
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/libxfs/xfs_ag.c4
-rw-r--r--fs/xfs/libxfs/xfs_ag.h8
-rw-r--r--fs/xfs/libxfs/xfs_defer.c6
-rw-r--r--fs/xfs/scrub/agheader.c9
-rw-r--r--fs/xfs/scrub/alloc.c3
-rw-r--r--fs/xfs/scrub/bmap.c3
-rw-r--r--fs/xfs/scrub/btree.c1
-rw-r--r--fs/xfs/scrub/common.c137
-rw-r--r--fs/xfs/scrub/common.h15
-rw-r--r--fs/xfs/scrub/dabtree.c1
-rw-r--r--fs/xfs/scrub/fscounters.c7
-rw-r--r--fs/xfs/scrub/health.c2
-rw-r--r--fs/xfs/scrub/ialloc.c2
-rw-r--r--fs/xfs/scrub/inode.c3
-rw-r--r--fs/xfs/scrub/quota.c3
-rw-r--r--fs/xfs/scrub/refcount.c9
-rw-r--r--fs/xfs/scrub/repair.c3
-rw-r--r--fs/xfs/scrub/rmap.c3
-rw-r--r--fs/xfs/scrub/scrub.c63
-rw-r--r--fs/xfs/scrub/scrub.h12
-rw-r--r--fs/xfs/scrub/trace.h69
-rw-r--r--fs/xfs/xfs_bmap_item.c12
-rw-r--r--fs/xfs/xfs_drain.c166
-rw-r--r--fs/xfs/xfs_drain.h87
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_refcount_item.c4
-rw-r--r--fs/xfs/xfs_rmap_item.c4
-rw-r--r--fs/xfs/xfs_trace.h71
31 files changed, 680 insertions, 39 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 9fac5ea8d0e4..05bc865142b8 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -93,10 +93,15 @@ config XFS_RT
If unsure, say N.
+config XFS_DRAIN_INTENTS
+ bool
+ select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
+
config XFS_ONLINE_SCRUB
bool "XFS online metadata check support"
default n
depends on XFS_FS
+ select XFS_DRAIN_INTENTS
help
If you say Y here you will be able to check metadata on a
mounted XFS filesystem. This feature is intended to reduce
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 92d88dc3c9f7..3bdbc838c4d1 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -136,6 +136,8 @@ ifeq ($(CONFIG_MEMORY_FAILURE),y)
xfs-$(CONFIG_FS_DAX) += xfs_notify_failure.o
endif
+xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o
+
# online scrub/repair
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 2d8910046ed9..1b078bbbf225 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -260,6 +260,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
+ xfs_defer_drain_free(&pag->pag_intents_drain);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_buf_hash_destroy(pag);
@@ -385,6 +386,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_state_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+ xfs_defer_drain_init(&pag->pag_intents_drain);
init_waitqueue_head(&pag->pagb_wait);
init_waitqueue_head(&pag->pag_active_wq);
pag->pagb_count = 0;
@@ -421,6 +423,7 @@ xfs_initialize_perag(
return 0;
out_remove_pag:
+ xfs_defer_drain_free(&pag->pag_intents_drain);
radix_tree_delete(&mp->m_perag_tree, index);
out_free_pag:
kmem_free(pag);
@@ -431,6 +434,7 @@ out_unwind_new_pags:
if (!pag)
break;
xfs_buf_hash_destroy(pag);
+ xfs_defer_drain_free(&pag->pag_intents_drain);
kmem_free(pag);
}
return error;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 8092eaba977d..2e0aef87d633 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -101,6 +101,14 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
+ /*
+ * We use xfs_drain to track the number of deferred log intent items
+ * that have been queued (but not yet processed) so that waiters (e.g.
+ * scrub) will not lock resources when other threads are in the middle
+ * of processing a chain of intent items only to find momentary
+ * inconsistencies.
+ */
+ struct xfs_defer_drain pag_intents_drain;
#endif /* __KERNEL__ */
};
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 5a321b783398..bcfb6a4203cd 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -397,6 +397,7 @@ xfs_defer_cancel_list(
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
+ trace_xfs_defer_cancel_item(mp, dfp, pwi);
ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
@@ -476,6 +477,7 @@ xfs_defer_finish_one(
list_for_each_safe(li, n, &dfp->dfp_work) {
list_del(li);
dfp->dfp_count--;
+ trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
error = ops->finish_item(tp, dfp->dfp_done, li, &state);
if (error == -EAGAIN) {
int ret;
@@ -623,7 +625,7 @@ xfs_defer_add(
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
- const struct xfs_defer_op_type *ops;
+ const struct xfs_defer_op_type *ops = defer_op_types[type];
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
@@ -636,7 +638,6 @@ xfs_defer_add(
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
- ops = defer_op_types[dfp->dfp_type];
if (dfp->dfp_type != type ||
(ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
@@ -653,6 +654,7 @@ xfs_defer_add(
}
list_add_tail(li, &dfp->dfp_work);
+ trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
dfp->dfp_count++;
}
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index c91819da1f5f..87cb13a6e84a 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -18,6 +18,15 @@
#include "scrub/scrub.h"
#include "scrub/common.h"
+int
+xchk_setup_agheader(
+ struct xfs_scrub *sc)
+{
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+ return xchk_setup_fs(sc);
+}
+
/* Superblock */
/* Cross-reference with the other btrees. */
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 39e79b9536bc..de313df2b15b 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -24,6 +24,9 @@ int
xchk_setup_ag_allocbt(
struct xfs_scrub *sc)
{
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
return xchk_setup_ag_btree(sc, false);
}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index f6d8cb938a02..a5078d63808f 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -31,6 +31,9 @@ xchk_setup_inode_bmap(
{
int error;
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
error = xchk_get_inode(sc);
if (error)
goto out;
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index e54c1cfe64bf..626282dbe2e3 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -36,6 +36,7 @@ __xchk_btree_process_error(
switch (*error) {
case -EDEADLOCK:
+ case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 597e6aca8628..dcfe66044d4a 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -75,6 +75,7 @@ __xchk_process_error(
case 0:
return true;
case -EDEADLOCK:
+ case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(
sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
@@ -130,6 +131,7 @@ __xchk_fblock_process_error(
case 0:
return true;
case -EDEADLOCK:
+ case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
@@ -396,26 +398,19 @@ want_ag_read_header_failure(
}
/*
- * Grab the perag structure and all the headers for an AG.
+ * Grab the AG header buffers for the attached perag structure.
*
* The headers should be released by xchk_ag_free, but as a fail safe we attach
* all the buffers we grab to the scrub transaction so they'll all be freed
- * when we cancel it. Returns ENOENT if we can't grab the perag structure.
+ * when we cancel it.
*/
-int
-xchk_ag_read_headers(
+static inline int
+xchk_perag_read_headers(
struct xfs_scrub *sc,
- xfs_agnumber_t agno,
struct xchk_ag *sa)
{
- struct xfs_mount *mp = sc->mp;
int error;
- ASSERT(!sa->pag);
- sa->pag = xfs_perag_get(mp, agno);
- if (!sa->pag)
- return -ENOENT;
-
error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp);
if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
return error;
@@ -427,6 +422,104 @@ xchk_ag_read_headers(
return 0;
}
+/*
+ * Grab the AG headers for the attached perag structure and wait for pending
+ * intents to drain.
+ */
+static int
+xchk_perag_drain_and_lock(
+ struct xfs_scrub *sc)
+{
+ struct xchk_ag *sa = &sc->sa;
+ int error = 0;
+
+ ASSERT(sa->pag != NULL);
+ ASSERT(sa->agi_bp == NULL);
+ ASSERT(sa->agf_bp == NULL);
+
+ do {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ error = xchk_perag_read_headers(sc, sa);
+ if (error)
+ return error;
+
+ /*
+ * If we've grabbed an inode for scrubbing then we assume that
+ * holding its ILOCK will suffice to coordinate with any intent
+ * chains involving this inode.
+ */
+ if (sc->ip)
+ return 0;
+
+ /*
+ * Decide if this AG is quiet enough for all metadata to be
+ * consistent with each other. XFS allows the AG header buffer
+ * locks to cycle across transaction rolls while processing
+ * chains of deferred ops, which means that there could be
+ * other threads in the middle of processing a chain of
+ * deferred ops. For regular operations we are careful about
+ * ordering operations to prevent collisions between threads
+ * (which is why we don't need a per-AG lock), but scrub and
+ * repair have to serialize against chained operations.
+ *
+ * We just locked all the AG headers buffers; now take a look
+ * to see if there are any intents in progress. If there are,
+ * drop the AG headers and wait for the intents to drain.
+ * Since we hold all the AG header locks for the duration of
+ * the scrub, this is the only time we have to sample the
+ * intents counter; any threads increasing it after this point
+ * can't possibly be in the middle of a chain of AG metadata
+ * updates.
+ *
+ * Obviously, this should be slanted against scrub and in favor
+ * of runtime threads.
+ */
+ if (!xfs_perag_intent_busy(sa->pag))
+ return 0;
+
+ if (sa->agf_bp) {
+ xfs_trans_brelse(sc->tp, sa->agf_bp);
+ sa->agf_bp = NULL;
+ }
+
+ if (sa->agi_bp) {
+ xfs_trans_brelse(sc->tp, sa->agi_bp);
+ sa->agi_bp = NULL;
+ }
+
+ if (!(sc->flags & XCHK_FSGATES_DRAIN))
+ return -ECHRNG;
+ error = xfs_perag_intent_drain(sa->pag);
+ if (error == -ERESTARTSYS)
+ error = -EINTR;
+ } while (!error);
+
+ return error;
+}
+
+/*
+ * Grab the per-AG structure, grab all AG header buffers, and wait until there
+ * aren't any pending intents. Returns -ENOENT if we can't grab the perag
+ * structure.
+ */
+int
+xchk_ag_read_headers(
+ struct xfs_scrub *sc,
+ xfs_agnumber_t agno,
+ struct xchk_ag *sa)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ ASSERT(!sa->pag);
+ sa->pag = xfs_perag_get(mp, agno);
+ if (!sa->pag)
+ return -ENOENT;
+
+ return xchk_perag_drain_and_lock(sc);
+}
+
/* Release all the AG btree cursors. */
void
xchk_ag_btcur_free(
@@ -916,3 +1009,25 @@ xchk_start_reaping(
}
sc->flags &= ~XCHK_REAPING_DISABLED;
}
+
+/*
+ * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
+ * operation. Callers must not hold any locks that intersect with the CPU
+ * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs
+ * to change kernel code.
+ */
+void
+xchk_fsgates_enable(
+ struct xfs_scrub *sc,
+ unsigned int scrub_fsgates)
+{
+ ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL));
+ ASSERT(!(sc->flags & scrub_fsgates));
+
+ trace_xchk_fsgates_enable(sc, scrub_fsgates);
+
+ if (scrub_fsgates & XCHK_FSGATES_DRAIN)
+ xfs_drain_wait_enable();
+
+ sc->flags |= scrub_fsgates;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 273a4331da05..83b1a392930a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -72,6 +72,7 @@ bool xchk_should_check_xref(struct xfs_scrub *sc, int *error,
struct xfs_btree_cur **curpp);
/* Setup functions */
+int xchk_setup_agheader(struct xfs_scrub *sc);
int xchk_setup_fs(struct xfs_scrub *sc);
int xchk_setup_ag_allocbt(struct xfs_scrub *sc);
int xchk_setup_ag_iallocbt(struct xfs_scrub *sc);
@@ -151,4 +152,18 @@ int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
void xchk_stop_reaping(struct xfs_scrub *sc);
void xchk_start_reaping(struct xfs_scrub *sc);
+/*
+ * Setting up a hook to wait for intents to drain is costly -- we have to take
+ * the CPU hotplug lock and force an i-cache flush on all CPUs once to set it
+ * up, and again to tear it down. These costs add up quickly, so we only want
+ * to enable the drain waiter if the drain actually detected a conflict with
+ * running intent chains.
+ */
+static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)
+{
+ return sc->flags & XCHK_NEED_DRAIN;
+}
+
+void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
+
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index c392c0765e5c..82b150d3b8b7 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -39,6 +39,7 @@ xchk_da_process_error(
switch (*error) {
case -EDEADLOCK:
+ case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index a38006c71bff..faa315be7978 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -130,6 +130,13 @@ xchk_setup_fscounters(
struct xchk_fscounters *fsc;
int error;
+ /*
+ * If the AGF doesn't track btreeblks, we have to lock the AGF to count
+ * btree block usage by walking the actual btrees.
+ */
+ if (!xfs_has_lazysbcount(sc->mp))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
if (!sc->buf)
return -ENOMEM;
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 66e99b0f6049..d2b2a1cb6533 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -7,6 +7,8 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index b14270bd1c62..9563769a8881 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -32,6 +32,8 @@ int
xchk_setup_ag_iallocbt(
struct xfs_scrub *sc)
{
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
return xchk_setup_ag_btree(sc, sc->flags & XCHK_TRY_HARDER);
}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index dc66a1465f1b..bbf9432c02c2 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -32,6 +32,9 @@ xchk_setup_inode(
{
int error;
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
/*
* Try to get the inode. If the verifiers fail, we try again
* in raw mode.
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index b019c70c065a..e6caa358cbda 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -53,6 +53,9 @@ xchk_setup_quota(
if (!xfs_this_quota_on(sc->mp, dqtype))
return -ENOENT;
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
error = xchk_setup_fs(sc);
if (error)
return error;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index a5005b1d010d..6f649cc01310 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -7,12 +7,15 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
+#include "scrub/trace.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_ag.h"
@@ -24,6 +27,8 @@ int
xchk_setup_ag_refcountbt(
struct xfs_scrub *sc)
{
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
return xchk_setup_ag_btree(sc, false);
}
@@ -300,8 +305,10 @@ xchk_refcountbt_xref_rmap(
goto out_free;
xchk_refcountbt_process_rmap_fragments(&refchk);
- if (irec->rc_refcount != refchk.seen)
+ if (irec->rc_refcount != refchk.seen) {
+ trace_xchk_refcount_incorrect(sc->sa.pag, irec, refchk.seen);
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+ }
out_free:
list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index b800341aae69..ab0758308f57 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -60,6 +60,9 @@ xrep_attempt(
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
sc->flags |= XREP_ALREADY_FIXED;
return -EAGAIN;
+ case -ECHRNG:
+ sc->flags |= XCHK_NEED_DRAIN;
+ return -EAGAIN;
case -EDEADLOCK:
/* Tell the caller to try again having grabbed all the locks. */
if (!(sc->flags & XCHK_TRY_HARDER)) {
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 4dc79e1a675d..c6e47ef4c79b 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -24,6 +24,9 @@ int
xchk_setup_ag_rmapbt(
struct xfs_scrub *sc)
{
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
return xchk_setup_ag_btree(sc, false);
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index e8e2bee001e5..787a9096ddef 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -145,6 +145,21 @@ xchk_probe(
/* Scrub setup and teardown */
+static inline void
+xchk_fsgates_disable(
+ struct xfs_scrub *sc)
+{
+ if (!(sc->flags & XCHK_FSGATES_ALL))
+ return;
+
+ trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
+
+ if (sc->flags & XCHK_FSGATES_DRAIN)
+ xfs_drain_wait_disable();
+
+ sc->flags &= ~XCHK_FSGATES_ALL;
+}
+
/* Free all the resources and finish the transactions. */
STATIC int
xchk_teardown(
@@ -177,6 +192,8 @@ xchk_teardown(
kvfree(sc->buf);
sc->buf = NULL;
}
+
+ xchk_fsgates_disable(sc);
return error;
}
@@ -191,25 +208,25 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
},
[XFS_SCRUB_TYPE_SB] = { /* superblock */
.type = ST_PERAG,
- .setup = xchk_setup_fs,
+ .setup = xchk_setup_agheader,
.scrub = xchk_superblock,
.repair = xrep_superblock,
},
[XFS_SCRUB_TYPE_AGF] = { /* agf */
.type = ST_PERAG,
- .setup = xchk_setup_fs,
+ .setup = xchk_setup_agheader,
.scrub = xchk_agf,
.repair = xrep_agf,
},
[XFS_SCRUB_TYPE_AGFL]= { /* agfl */
.type = ST_PERAG,
- .setup = xchk_setup_fs,
+ .setup = xchk_setup_agheader,
.scrub = xchk_agfl,
.repair = xrep_agfl,
},
[XFS_SCRUB_TYPE_AGI] = { /* agi */
.type = ST_PERAG,
- .setup = xchk_setup_fs,
+ .setup = xchk_setup_agheader,
.scrub = xchk_agi,
.repair = xrep_agi,
},
@@ -491,23 +508,20 @@ retry_op:
/* Set up for the operation. */
error = sc->ops->setup(sc);
+ if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
+ goto try_harder;
+ if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
+ goto need_drain;
if (error)
goto out_teardown;
/* Scrub for errors. */
error = sc->ops->scrub(sc);
- if (!(sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
- /*
- * Scrubbers return -EDEADLOCK to mean 'try harder'.
- * Tear down everything we hold, then set up again with
- * preparation for worst-case scenarios.
- */
- error = xchk_teardown(sc, 0);
- if (error)
- goto out_sc;
- sc->flags |= XCHK_TRY_HARDER;
- goto retry_op;
- } else if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
+ if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
+ goto try_harder;
+ if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
+ goto need_drain;
+ if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
goto out_teardown;
xchk_update_health(sc);
@@ -565,4 +579,21 @@ out:
error = 0;
}
return error;
+need_drain:
+ error = xchk_teardown(sc, 0);
+ if (error)
+ goto out_sc;
+ sc->flags |= XCHK_NEED_DRAIN;
+ goto retry_op;
+try_harder:
+ /*
+ * Scrubbers return -EDEADLOCK to mean 'try harder'. Tear down
+ * everything we hold, then set up again with preparation for
+ * worst-case scenarios.
+ */
+ error = xchk_teardown(sc, 0);
+ if (error)
+ goto out_sc;
+ sc->flags |= XCHK_TRY_HARDER;
+ goto retry_op;
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 72a5a8a64a87..d85c3b883b4c 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -96,9 +96,19 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
-#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
+#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */
+#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
+#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
+/*
+ * The XCHK_FSGATES* flags reflect functionality in the main filesystem that
+ * are only enabled for this particular online fsck. When not in use, the
+ * features are gated off via dynamic code patching, which is why the state
+ * must be enabled during scrub setup and can only be torn down afterwards.
+ */
+#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN)
+
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 81f7c3051a1a..68efd6fda61c 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -30,6 +30,9 @@ TRACE_DEFINE_ENUM(XFS_BTNUM_FINOi);
TRACE_DEFINE_ENUM(XFS_BTNUM_RMAPi);
TRACE_DEFINE_ENUM(XFS_BTNUM_REFCi);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
+
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PROBE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SB);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGF);
@@ -93,6 +96,13 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
{ XFS_SCRUB_OFLAG_WARNING, "warning" }, \
{ XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED, "norepair" }
+#define XFS_SCRUB_STATE_STRINGS \
+ { XCHK_TRY_HARDER, "try_harder" }, \
+ { XCHK_REAPING_DISABLED, "reaping_disabled" }, \
+ { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
+ { XCHK_NEED_DRAIN, "need_drain" }, \
+ { XREP_ALREADY_FIXED, "already_fixed" }
+
DECLARE_EVENT_CLASS(xchk_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
int error),
@@ -139,6 +149,33 @@ DEFINE_SCRUB_EVENT(xchk_deadlock_retry);
DEFINE_SCRUB_EVENT(xrep_attempt);
DEFINE_SCRUB_EVENT(xrep_done);
+DECLARE_EVENT_CLASS(xchk_fsgate_class,
+ TP_PROTO(struct xfs_scrub *sc, unsigned int fsgate_flags),
+ TP_ARGS(sc, fsgate_flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(unsigned int, fsgate_flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->type = sc->sm->sm_type;
+ __entry->fsgate_flags = fsgate_flags;
+ ),
+ TP_printk("dev %d:%d type %s fsgates '%s'",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __print_flags(__entry->fsgate_flags, "|", XFS_SCRUB_STATE_STRINGS))
+)
+
+#define DEFINE_SCRUB_FSHOOK_EVENT(name) \
+DEFINE_EVENT(xchk_fsgate_class, name, \
+ TP_PROTO(struct xfs_scrub *sc, unsigned int fsgates_flags), \
+ TP_ARGS(sc, fsgates_flags))
+
+DEFINE_SCRUB_FSHOOK_EVENT(xchk_fsgates_enable);
+DEFINE_SCRUB_FSHOOK_EVENT(xchk_fsgates_disable);
+
TRACE_EVENT(xchk_op_error,
TP_PROTO(struct xfs_scrub *sc, xfs_agnumber_t agno,
xfs_agblock_t bno, int error, void *ret_ip),
@@ -657,6 +694,38 @@ TRACE_EVENT(xchk_fscounters_within_range,
__entry->old_value)
)
+TRACE_EVENT(xchk_refcount_incorrect,
+ TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
+ xfs_nlink_t seen),
+ TP_ARGS(pag, irec, seen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
+ __field(xfs_agblock_t, startblock)
+ __field(xfs_extlen_t, blockcount)
+ __field(xfs_nlink_t, refcount)
+ __field(xfs_nlink_t, seen)
+ ),
+ TP_fast_assign(
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
+ __entry->domain = irec->rc_domain;
+ __entry->startblock = irec->rc_startblock;
+ __entry->blockcount = irec->rc_blockcount;
+ __entry->refcount = irec->rc_refcount;
+ __entry->seen = seen;
+ ),
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u seen %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
+ __entry->startblock,
+ __entry->blockcount,
+ __entry->refcount,
+ __entry->seen)
+)
+
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 8f0f33d07d2c..7551c3ec4ea5 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -373,7 +373,15 @@ xfs_bmap_update_get_group(
xfs_agnumber_t agno;
agno = XFS_FSB_TO_AGNO(mp, bi->bi_bmap.br_startblock);
- bi->bi_pag = xfs_perag_get(mp, agno);
+
+ /*
+ * Bump the intent count on behalf of the deferred rmap and refcount
+ * intent items that that we can queue when we finish this bmap work.
+ * This new intent item will bump the intent count before the bmap
+ * intent drops the intent count, ensuring that the intent count
+ * remains nonzero across the transaction roll.
+ */
+ bi->bi_pag = xfs_perag_intent_get(mp, agno);
}
/* Release a passive AG ref after finishing mapping work. */
@@ -381,7 +389,7 @@ static inline void
xfs_bmap_update_put_group(
struct xfs_bmap_intent *bi)
{
- xfs_perag_put(bi->bi_pag);
+ xfs_perag_intent_put(bi->bi_pag);
}
/* Process a deferred rmap update. */
diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c
new file mode 100644
index 000000000000..005a66be44a2
--- /dev/null
+++ b/fs/xfs/xfs_drain.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_ag.h"
+#include "xfs_trace.h"
+
+/*
+ * Use a static key here to reduce the overhead of xfs_drain_rele. If the
+ * compiler supports jump labels, the static branch will be replaced by a nop
+ * sled when there are no xfs_drain_wait callers. Online fsck is currently
+ * the only caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+static DEFINE_STATIC_KEY_FALSE(xfs_drain_waiter_gate);
+
+void
+xfs_drain_wait_disable(void)
+{
+ static_branch_dec(&xfs_drain_waiter_gate);
+}
+
+void
+xfs_drain_wait_enable(void)
+{
+ static_branch_inc(&xfs_drain_waiter_gate);
+}
+
+void
+xfs_defer_drain_init(
+ struct xfs_defer_drain *dr)
+{
+ atomic_set(&dr->dr_count, 0);
+ init_waitqueue_head(&dr->dr_waiters);
+}
+
+void
+xfs_defer_drain_free(struct xfs_defer_drain *dr)
+{
+ ASSERT(atomic_read(&dr->dr_count) == 0);
+}
+
+/* Increase the pending intent count. */
+static inline void xfs_defer_drain_grab(struct xfs_defer_drain *dr)
+{
+ atomic_inc(&dr->dr_count);
+}
+
+static inline bool has_waiters(struct wait_queue_head *wq_head)
+{
+ /*
+ * This memory barrier is paired with the one in set_current_state on
+ * the waiting side.
+ */
+ smp_mb__after_atomic();
+ return waitqueue_active(wq_head);
+}
+
+/* Decrease the pending intent count, and wake any waiters, if appropriate. */
+static inline void xfs_defer_drain_rele(struct xfs_defer_drain *dr)
+{
+ if (atomic_dec_and_test(&dr->dr_count) &&
+ static_branch_unlikely(&xfs_drain_waiter_gate) &&
+ has_waiters(&dr->dr_waiters))
+ wake_up(&dr->dr_waiters);
+}
+
+/* Are there intents pending? */
+static inline bool xfs_defer_drain_busy(struct xfs_defer_drain *dr)
+{
+ return atomic_read(&dr->dr_count) > 0;
+}
+
+/*
+ * Wait for the pending intent count for a drain to hit zero.
+ *
+ * Callers must not hold any locks that would prevent intents from being
+ * finished.
+ */
+static inline int xfs_defer_drain_wait(struct xfs_defer_drain *dr)
+{
+ return wait_event_killable(dr->dr_waiters, !xfs_defer_drain_busy(dr));
+}
+
+/*
+ * Get a passive reference to an AG and declare an intent to update its
+ * metadata.
+ */
+struct xfs_perag *
+xfs_perag_intent_get(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, agno);
+ if (!pag)
+ return NULL;
+
+ xfs_perag_intent_hold(pag);
+ return pag;
+}
+
+/*
+ * Release our intent to update this AG's metadata, and then release our
+ * passive ref to the AG.
+ */
+void
+xfs_perag_intent_put(
+ struct xfs_perag *pag)
+{
+ xfs_perag_intent_rele(pag);
+ xfs_perag_put(pag);
+}
+
+/*
+ * Declare an intent to update AG metadata. Other threads that need exclusive
+ * access can decide to back off if they see declared intentions.
+ */
+void
+xfs_perag_intent_hold(
+ struct xfs_perag *pag)
+{
+ trace_xfs_perag_intent_hold(pag, __return_address);
+ xfs_defer_drain_grab(&pag->pag_intents_drain);
+}
+
+/* Release our intent to update this AG's metadata. */
+void
+xfs_perag_intent_rele(
+ struct xfs_perag *pag)
+{
+ trace_xfs_perag_intent_rele(pag, __return_address);
+ xfs_defer_drain_rele(&pag->pag_intents_drain);
+}
+
+/*
+ * Wait for the intent update count for this AG to hit zero.
+ * Callers must not hold any AG header buffers.
+ */
+int
+xfs_perag_intent_drain(
+ struct xfs_perag *pag)
+{
+ trace_xfs_perag_wait_intents(pag, __return_address);
+ return xfs_defer_drain_wait(&pag->pag_intents_drain);
+}
+
+/* Has anyone declared an intent to update this AG? */
+bool
+xfs_perag_intent_busy(
+ struct xfs_perag *pag)
+{
+ return xfs_defer_drain_busy(&pag->pag_intents_drain);
+}
diff --git a/fs/xfs/xfs_drain.h b/fs/xfs/xfs_drain.h
new file mode 100644
index 000000000000..50a5772a8296
--- /dev/null
+++ b/fs/xfs/xfs_drain.h
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef XFS_DRAIN_H_
+#define XFS_DRAIN_H_
+
+struct xfs_perag;
+
+#ifdef CONFIG_XFS_DRAIN_INTENTS
+/*
+ * Passive drain mechanism. This data structure tracks a count of some items
+ * and contains a waitqueue for callers who would like to wake up when the
+ * count hits zero.
+ */
+struct xfs_defer_drain {
+ /* Number of items pending in some part of the filesystem. */
+ atomic_t dr_count;
+
+ /* Queue to wait for dri_count to go to zero */
+ struct wait_queue_head dr_waiters;
+};
+
+void xfs_defer_drain_init(struct xfs_defer_drain *dr);
+void xfs_defer_drain_free(struct xfs_defer_drain *dr);
+
+void xfs_drain_wait_disable(void);
+void xfs_drain_wait_enable(void);
+
+/*
+ * Deferred Work Intent Drains
+ * ===========================
+ *
+ * When a writer thread executes a chain of log intent items, the AG header
+ * buffer locks will cycle during a transaction roll to get from one intent
+ * item to the next in a chain. Although scrub takes all AG header buffer
+ * locks, this isn't sufficient to guard against scrub checking an AG while
+ * that writer thread is in the middle of finishing a chain because there's no
+ * higher level locking primitive guarding allocation groups.
+ *
+ * When there's a collision, cross-referencing between data structures (e.g.
+ * rmapbt and refcountbt) yields false corruption events; if repair is running,
+ * this results in incorrect repairs, which is catastrophic.
+ *
+ * The solution is to the perag structure the count of active intents and make
+ * scrub wait until it has both AG header buffer locks and the intent counter
+ * reaches zero. It is therefore critical that deferred work threads hold the
+ * AGI or AGF buffers when decrementing the intent counter.
+ *
+ * Given a list of deferred work items, the deferred work manager will complete
+ * a work item and all the sub-items that the parent item creates before moving
+ * on to the next work item in the list. This is also true for all levels of
+ * sub-items. Writer threads are permitted to queue multiple work items
+ * targetting the same AG, so a deferred work item (such as a BUI) that creates
+ * sub-items (such as RUIs) must bump the intent counter and maintain it until
+ * the sub-items can themselves bump the intent counter.
+ *
+ * Therefore, the intent count tracks entire lifetimes of deferred work items.
+ * All functions that create work items must increment the intent counter as
+ * soon as the item is added to the transaction and cannot drop the counter
+ * until the item is finished or cancelled.
+ */
+struct xfs_perag *xfs_perag_intent_get(struct xfs_mount *mp,
+ xfs_agnumber_t agno);
+void xfs_perag_intent_put(struct xfs_perag *pag);
+
+void xfs_perag_intent_hold(struct xfs_perag *pag);
+void xfs_perag_intent_rele(struct xfs_perag *pag);
+
+int xfs_perag_intent_drain(struct xfs_perag *pag);
+bool xfs_perag_intent_busy(struct xfs_perag *pag);
+#else
+struct xfs_defer_drain { /* empty */ };
+
+#define xfs_defer_drain_free(dr) ((void)0)
+#define xfs_defer_drain_init(dr) ((void)0)
+
+#define xfs_perag_intent_get(mp, agno) xfs_perag_get((mp), (agno))
+#define xfs_perag_intent_put(pag) xfs_perag_put(pag)
+
+static inline void xfs_perag_intent_hold(struct xfs_perag *pag) { }
+static inline void xfs_perag_intent_rele(struct xfs_perag *pag) { }
+
+#endif /* CONFIG_XFS_DRAIN_INTENTS */
+
+#endif /* XFS_DRAIN_H_ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 38b66fcfddc8..f9e36b810663 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -469,7 +469,7 @@ xfs_extent_free_get_group(
xfs_agnumber_t agno;
agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock);
- xefi->xefi_pag = xfs_perag_get(mp, agno);
+ xefi->xefi_pag = xfs_perag_intent_get(mp, agno);
}
/* Release a passive AG ref after some freeing work. */
@@ -477,7 +477,7 @@ static inline void
xfs_extent_free_put_group(
struct xfs_extent_free_item *xefi)
{
- xfs_perag_put(xefi->xefi_pag);
+ xfs_perag_intent_put(xefi->xefi_pag);
}
/* Process a free extent. */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index e88f18f85e4b..74dcb05069e8 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -80,6 +80,7 @@ typedef __u32 xfs_nlink_t;
#include "xfs_cksum.h"
#include "xfs_buf.h"
#include "xfs_message.h"
+#include "xfs_drain.h"
#ifdef __BIG_ENDIAN
#define XFS_NATIVE_HOST 1
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 7edee9590ed6..edd8587658d5 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -374,7 +374,7 @@ xfs_refcount_update_get_group(
xfs_agnumber_t agno;
agno = XFS_FSB_TO_AGNO(mp, ri->ri_startblock);
- ri->ri_pag = xfs_perag_get(mp, agno);
+ ri->ri_pag = xfs_perag_intent_get(mp, agno);
}
/* Release a passive AG ref after finishing refcounting work. */
@@ -382,7 +382,7 @@ static inline void
xfs_refcount_update_put_group(
struct xfs_refcount_intent *ri)
{
- xfs_perag_put(ri->ri_pag);
+ xfs_perag_intent_put(ri->ri_pag);
}
/* Process a deferred refcount update. */
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 739ddbd04a17..520c7ebdfed8 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -399,7 +399,7 @@ xfs_rmap_update_get_group(
xfs_agnumber_t agno;
agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock);
- ri->ri_pag = xfs_perag_get(mp, agno);
+ ri->ri_pag = xfs_perag_intent_get(mp, agno);
}
/* Release a passive AG ref after finishing rmapping work. */
@@ -407,7 +407,7 @@ static inline void
xfs_rmap_update_put_group(
struct xfs_rmap_intent *ri)
{
- xfs_perag_put(ri->ri_pag);
+ xfs_perag_intent_put(ri->ri_pag);
}
/* Process a deferred rmap update. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index db09bb771765..cd4ca5b1fcb0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2687,6 +2687,44 @@ DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred);
DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_defer);
DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_deferred);
+DECLARE_EVENT_CLASS(xfs_defer_pending_item_class,
+ TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp,
+ void *item),
+ TP_ARGS(mp, dfp, item),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(void *, intent)
+ __field(void *, item)
+ __field(char, committed)
+ __field(int, nr)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp ? mp->m_super->s_dev : 0;
+ __entry->type = dfp->dfp_type;
+ __entry->intent = dfp->dfp_intent;
+ __entry->item = item;
+ __entry->committed = dfp->dfp_done != NULL;
+ __entry->nr = dfp->dfp_count;
+ ),
+ TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->intent,
+ __entry->item,
+ __entry->committed,
+ __entry->nr)
+)
+#define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_defer_pending_item_class, name, \
+ TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \
+ void *item), \
+ TP_ARGS(mp, dfp, item))
+
+DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item);
+DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item);
+DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item);
+
/* rmap tracepoints */
DECLARE_EVENT_CLASS(xfs_rmap_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -4326,6 +4364,39 @@ TRACE_EVENT(xfs_force_shutdown,
__entry->line_num)
);
+#ifdef CONFIG_XFS_DRAIN_INTENTS
+DECLARE_EVENT_CLASS(xfs_perag_intents_class,
+ TP_PROTO(struct xfs_perag *pag, void *caller_ip),
+ TP_ARGS(pag, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(long, nr_intents)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
+ __entry->nr_intents = atomic_read(&pag->pag_intents_drain.dr_count);
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->nr_intents,
+ __entry->caller_ip)
+);
+
+#define DEFINE_PERAG_INTENTS_EVENT(name) \
+DEFINE_EVENT(xfs_perag_intents_class, name, \
+ TP_PROTO(struct xfs_perag *pag, void *caller_ip), \
+ TP_ARGS(pag, caller_ip))
+DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_hold);
+DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_rele);
+DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents);
+
+#endif /* CONFIG_XFS_DRAIN_INTENTS */
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH