summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/repair.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/repair.c')
-rw-r--r--fs/xfs/scrub/repair.c377
1 files changed, 61 insertions, 316 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index ac6d8803e660..1b8b5439f2d7 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -26,11 +26,13 @@
#include "xfs_ag_resv.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
+#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/stats.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -39,8 +41,10 @@
*/
int
xrep_attempt(
- struct xfs_scrub *sc)
+ struct xfs_scrub *sc,
+ struct xchk_stats_run *run)
{
+ u64 repair_start;
int error = 0;
trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
@@ -49,8 +53,11 @@ xrep_attempt(
/* Repair whatever's broken. */
ASSERT(sc->ops->repair);
+ run->repair_attempted = true;
+ repair_start = xchk_stats_now();
error = sc->ops->repair(sc);
trace_xrep_done(XFS_I(file_inode(sc->file)), sc->sm, error);
+ run->repair_ns += xchk_stats_elapsed_ns(repair_start);
switch (error) {
case 0:
/*
@@ -59,14 +66,17 @@ xrep_attempt(
*/
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
sc->flags |= XREP_ALREADY_FIXED;
+ run->repair_succeeded = true;
return -EAGAIN;
case -ECHRNG:
sc->flags |= XCHK_NEED_DRAIN;
+ run->retries++;
return -EAGAIN;
case -EDEADLOCK:
/* Tell the caller to try again having grabbed all the locks. */
if (!(sc->flags & XCHK_TRY_HARDER)) {
sc->flags |= XCHK_TRY_HARDER;
+ run->retries++;
return -EAGAIN;
}
/*
@@ -166,6 +176,56 @@ xrep_roll_ag_trans(
return 0;
}
+/* Finish all deferred work attached to the repair transaction. */
+int
+xrep_defer_finish(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ /*
+ * Keep the AG header buffers locked while we complete deferred work
+ * items. Ensure that both AG buffers are dirty and held when we roll
+ * the transaction so that they move forward in the log without losing
+ * the bli (and hence the bli type) when the transaction commits.
+ *
+ * Normal code would never hold clean buffers across a roll, but repair
+ * needs both buffers to maintain a total lock on the AG.
+ */
+ if (sc->sa.agi_bp) {
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
+ xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+ }
+
+ if (sc->sa.agf_bp) {
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
+ xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+ }
+
+ /*
+ * Finish all deferred work items. We still hold the AG header buffers
+ * locked regardless of whether or not that succeeds. On failure, the
+ * buffers will be released during teardown on our way out of the
+ * kernel. If successful, join the buffers to the new transaction
+ * and move on.
+ */
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ return error;
+
+ /*
+ * Release the hold that we set above because defer_finish won't do
+ * that for us. The defer roll code redirties held buffers after each
+ * roll, so the AG header buffers should be ready for logging.
+ */
+ if (sc->sa.agi_bp)
+ xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+ if (sc->sa.agf_bp)
+ xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+
+ return 0;
+}
+
/*
* Does the given AG have enough space to rebuild a btree? Neither AG
* reservation can be critical, and we must have enough space (factoring
@@ -297,89 +357,6 @@ xrep_calc_ag_resblks(
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
}
-/* Allocate a block in an AG. */
-int
-xrep_alloc_ag_block(
- struct xfs_scrub *sc,
- const struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno,
- enum xfs_ag_resv_type resv)
-{
- struct xfs_alloc_arg args = {0};
- xfs_agblock_t bno;
- int error;
-
- switch (resv) {
- case XFS_AG_RESV_AGFL:
- case XFS_AG_RESV_RMAPBT:
- error = xfs_alloc_get_freelist(sc->sa.pag, sc->tp,
- sc->sa.agf_bp, &bno, 1);
- if (error)
- return error;
- if (bno == NULLAGBLOCK)
- return -ENOSPC;
- xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, 1, false);
- *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno);
- if (resv == XFS_AG_RESV_RMAPBT)
- xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno);
- return 0;
- default:
- break;
- }
-
- args.tp = sc->tp;
- args.mp = sc->mp;
- args.pag = sc->sa.pag;
- args.oinfo = *oinfo;
- args.minlen = 1;
- args.maxlen = 1;
- args.prod = 1;
- args.resv = resv;
-
- error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno);
- if (error)
- return error;
- if (args.fsbno == NULLFSBLOCK)
- return -ENOSPC;
- ASSERT(args.len == 1);
- *fsbno = args.fsbno;
-
- return 0;
-}
-
-/* Initialize a new AG btree root block with zero entries. */
-int
-xrep_init_btblock(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsb,
- struct xfs_buf **bpp,
- xfs_btnum_t btnum,
- const struct xfs_buf_ops *ops)
-{
- struct xfs_trans *tp = sc->tp;
- struct xfs_mount *mp = sc->mp;
- struct xfs_buf *bp;
- int error;
-
- trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
- XFS_FSB_TO_AGBNO(mp, fsb), btnum);
-
- ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.pag->pag_agno);
- error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0,
- &bp);
- if (error)
- return error;
- xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
- xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.pag->pag_agno);
- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
- xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1);
- bp->b_ops = ops;
- *bpp = bp;
-
- return 0;
-}
-
/*
* Reconstructing per-AG Btrees
*
@@ -404,91 +381,8 @@ xrep_init_btblock(
* sublist. As with the other btrees we subtract sublist from bitmap, and the
* result (since the rmapbt lives in the free space) are the blocks from the
* old rmapbt.
- *
- * Disposal of Blocks from Old per-AG Btrees
- *
- * Now that we've constructed a new btree to replace the damaged one, we want
- * to dispose of the blocks that (we think) the old btree was using.
- * Previously, we used the rmapbt to collect the extents (bitmap) with the
- * rmap owner corresponding to the tree we rebuilt, collected extents for any
- * blocks with the same rmap owner that are owned by another data structure
- * (sublist), and subtracted sublist from bitmap. In theory the extents
- * remaining in bitmap are the old btree's blocks.
- *
- * Unfortunately, it's possible that the btree was crosslinked with other
- * blocks on disk. The rmap data can tell us if there are multiple owners, so
- * if the rmapbt says there is an owner of this block other than @oinfo, then
- * the block is crosslinked. Remove the reverse mapping and continue.
- *
- * If there is one rmap record, we can free the block, which removes the
- * reverse mapping but doesn't add the block to the free space. Our repair
- * strategy is to hope the other metadata objects crosslinked on this block
- * will be rebuilt (atop different blocks), thereby removing all the cross
- * links.
- *
- * If there are no rmap records at all, we also free the block. If the btree
- * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
- * supposed to be a rmap record and everything is ok. For other btrees there
- * had to have been an rmap entry for the block to have ended up on @bitmap,
- * so if it's gone now there's something wrong and the fs will shut down.
- *
- * Note: If there are multiple rmap records with only the same rmap owner as
- * the btree we're trying to rebuild and the block is indeed owned by another
- * data structure with the same rmap owner, then the block will be in sublist
- * and therefore doesn't need disposal. If there are multiple rmap records
- * with only the same rmap owner but the block is not owned by something with
- * the same rmap owner, the block will be freed.
- *
- * The caller is responsible for locking the AG headers for the entire rebuild
- * operation so that nothing else can sneak in and change the AG state while
- * we're not looking. We also assume that the caller already invalidated any
- * buffers associated with @bitmap.
*/
-static int
-xrep_invalidate_block(
- uint64_t fsbno,
- void *priv)
-{
- struct xfs_scrub *sc = priv;
- struct xfs_buf *bp;
- int error;
-
- /* Skip AG headers and post-EOFS blocks */
- if (!xfs_verify_fsbno(sc->mp, fsbno))
- return 0;
-
- error = xfs_buf_incore(sc->mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(sc->mp, fsbno),
- XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK, &bp);
- if (error)
- return 0;
-
- xfs_trans_bjoin(sc->tp, bp);
- xfs_trans_binval(sc->tp, bp);
- return 0;
-}
-
-/*
- * Invalidate buffers for per-AG btree blocks we're dumping. This function
- * is not intended for use with file data repairs; we have bunmapi for that.
- */
-int
-xrep_invalidate_blocks(
- struct xfs_scrub *sc,
- struct xbitmap *bitmap)
-{
- /*
- * For each block in each extent, see if there's an incore buffer for
- * exactly that block; if so, invalidate it. The buffer cache only
- * lets us look for one buffer at a time, so we have to look one block
- * at a time. Avoid invalidating AG headers and post-EOFS blocks
- * because we never own those; and if we can't TRYLOCK the buffer we
- * assume it's owned by someone else.
- */
- return xbitmap_walk_bits(bitmap, xrep_invalidate_block, sc);
-}
-
/* Ensure the freelist is the correct size. */
int
xrep_fix_freelist(
@@ -507,155 +401,6 @@ xrep_fix_freelist(
can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
}
-/* Information about reaping extents after a repair. */
-struct xrep_reap_state {
- struct xfs_scrub *sc;
-
- /* Reverse mapping owner and metadata reservation type. */
- const struct xfs_owner_info *oinfo;
- enum xfs_ag_resv_type resv;
-};
-
-/*
- * Put a block back on the AGFL.
- */
-STATIC int
-xrep_put_freelist(
- struct xfs_scrub *sc,
- xfs_agblock_t agbno)
-{
- struct xfs_buf *agfl_bp;
- int error;
-
- /* Make sure there's space on the freelist. */
- error = xrep_fix_freelist(sc, true);
- if (error)
- return error;
-
- /*
- * Since we're "freeing" a lost block onto the AGFL, we have to
- * create an rmap for the block prior to merging it or else other
- * parts will break.
- */
- error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
- &XFS_RMAP_OINFO_AG);
- if (error)
- return error;
-
- /* Put the block on the AGFL. */
- error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
- if (error)
- return error;
-
- error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
- agfl_bp, agbno, 0);
- if (error)
- return error;
- xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
- XFS_EXTENT_BUSY_SKIP_DISCARD);
-
- return 0;
-}
-
-/* Dispose of a single block. */
-STATIC int
-xrep_reap_block(
- uint64_t fsbno,
- void *priv)
-{
- struct xrep_reap_state *rs = priv;
- struct xfs_scrub *sc = rs->sc;
- struct xfs_btree_cur *cur;
- struct xfs_buf *agf_bp = NULL;
- xfs_agblock_t agbno;
- bool has_other_rmap;
- int error;
-
- ASSERT(sc->ip != NULL ||
- XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
- trace_xrep_dispose_btree_extent(sc->mp,
- XFS_FSB_TO_AGNO(sc->mp, fsbno),
- XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
-
- agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
- ASSERT(XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
-
- /*
- * If we are repairing per-inode metadata, we need to read in the AGF
- * buffer. Otherwise, we're repairing a per-AG structure, so reuse
- * the AGF buffer that the setup functions already grabbed.
- */
- if (sc->ip) {
- error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp);
- if (error)
- return error;
- } else {
- agf_bp = sc->sa.agf_bp;
- }
- cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, sc->sa.pag);
-
- /* Can we find any other rmappings? */
- error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
- &has_other_rmap);
- xfs_btree_del_cursor(cur, error);
- if (error)
- goto out_free;
-
- /*
- * If there are other rmappings, this block is cross linked and must
- * not be freed. Remove the reverse mapping and move on. Otherwise,
- * we were the only owner of the block, so free the extent, which will
- * also remove the rmap.
- *
- * XXX: XFS doesn't support detecting the case where a single block
- * metadata structure is crosslinked with a multi-block structure
- * because the buffer cache doesn't detect aliasing problems, so we
- * can't fix 100% of crosslinking problems (yet). The verifiers will
- * blow on writeout, the filesystem will shut down, and the admin gets
- * to run xfs_repair.
- */
- if (has_other_rmap)
- error = xfs_rmap_free(sc->tp, agf_bp, sc->sa.pag, agbno,
- 1, rs->oinfo);
- else if (rs->resv == XFS_AG_RESV_AGFL)
- error = xrep_put_freelist(sc, agbno);
- else
- error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, rs->oinfo,
- rs->resv);
- if (agf_bp != sc->sa.agf_bp)
- xfs_trans_brelse(sc->tp, agf_bp);
- if (error)
- return error;
-
- if (sc->ip)
- return xfs_trans_roll_inode(&sc->tp, sc->ip);
- return xrep_roll_ag_trans(sc);
-
-out_free:
- if (agf_bp != sc->sa.agf_bp)
- xfs_trans_brelse(sc->tp, agf_bp);
- return error;
-}
-
-/* Dispose of every block of every extent in the bitmap. */
-int
-xrep_reap_extents(
- struct xfs_scrub *sc,
- struct xbitmap *bitmap,
- const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type)
-{
- struct xrep_reap_state rs = {
- .sc = sc,
- .oinfo = oinfo,
- .resv = type,
- };
-
- ASSERT(xfs_has_rmapbt(sc->mp));
-
- return xbitmap_walk_bits(bitmap, xrep_reap_block, &rs);
-}
-
/*
* Finding per-AG Btree Roots for AGF/AGI Reconstruction
*