summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_trans.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 05:21:40 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 05:21:40 +0300
commit16d91548d1057691979de4686693f0ff92f46000 (patch)
tree0ab23001a138badebf7702416753c44e277e0ff7 /fs/xfs/xfs_trans.c
parentd9afbb3509900a953f5cf90bc57e793ee80c1108 (diff)
parent6dcde60efd946e38fac8d276a6ca47492103e856 (diff)
downloadlinux-16d91548d1057691979de4686693f0ff92f46000.tar.xz
Merge tag 'xfs-5.8-merge-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "Most of the changes this cycle are refactoring of existing code in preparation for things landing in the future. We also fixed various problems and deficiencies in the quota implementation, and (I hope) the last of the stale read vectors by forcing write allocations to go through the unwritten state until the write completes. Summary: - Various cleanups to remove dead code, unnecessary conditionals, asserts, etc. - Fix a linker warning caused by xfs stuffing '-g' into CFLAGS redundantly. - Tighten up our dmesg logging to ensure that everything is prefixed with 'XFS' for easier grepping. - Kill a bunch of typedefs. - Refactor the deferred ops code to reduce indirect function calls. - Increase type-safety with the deferred ops code. - Make the DAX mount options a tri-state. - Fix some error handling problems in the inode flush code and clean up other inode flush warts. - Refactor log recovery so that each log item recovery functions now live with the other log item processing code. - Fix some SPDX forms. - Fix quota counter corruption if the fs crashes after running quotacheck but before any dquots get logged. - Don't fail metadata verification on zero-entry attr leaf blocks, since they're just part of the disk format now due to a historic lack of log atomicity. - Don't allow SWAPEXT between files with different [ugp]id when quotas are enabled. - Refactor inode fork reading and verification to run directly from the inode-from-disk function. This means that we now actually guarantee that _iget'ted inodes are totally verified and ready to go. - Move the incore inode fork format and extent counts to the ifork structure. - Scalability improvements by reducing cacheline pingponging in struct xfs_mount. - More scalability improvements by removing m_active_trans from the hot path. - Fix inode counter update sanity checking to run /only/ on debug kernels. - Fix longstanding inconsistency in what error code we return when a program hits project quota limits (ENOSPC). - Fix group quota returning the wrong error code when a program hits group quota limits. - Fix per-type quota limits and grace periods for group and project quotas so that they actually work. - Allow extension of individual grace periods. - Refactor the non-reclaim inode radix tree walking code to remove a bunch of stupid little functions and straighten out the inconsistent naming schemes. - Fix a bug in speculative preallocation where we measured a new allocation based on the last extent mapping in the file instead of looking farther for the last contiguous space allocation. - Force delalloc writes to unwritten extents. This closes a stale disk contents exposure vector if the system goes down before the write completes. - More lockdep whackamole" * tag 'xfs-5.8-merge-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (129 commits) xfs: more lockdep whackamole with kmem_alloc* xfs: force writes to delalloc regions to unwritten xfs: refactor xfs_iomap_prealloc_size xfs: measure all contiguous previous extents for prealloc size xfs: don't fail unwritten extent conversion on writeback due to edquot xfs: rearrange xfs_inode_walk_ag parameters xfs: straighten out all the naming around incore inode tree walks xfs: move xfs_inode_ag_iterator to be closer to the perag walking code xfs: use bool for done in xfs_inode_ag_walk xfs: fix inode ag walk predicate function return values xfs: refactor eofb matching into a single helper xfs: remove __xfs_icache_free_eofblocks xfs: remove flags argument from xfs_inode_ag_walk xfs: remove xfs_inode_ag_iterator_flags xfs: remove unused xfs_inode_ag_iterator function xfs: replace open-coded XFS_ICI_NO_TAG xfs: move eofblocks conversion function to xfs_ioctl.c xfs: allow individual quota grace period extension xfs: per-type quota timers and warn limits xfs: switch xfs_get_defquota to take explicit type ...
Diffstat (limited to 'fs/xfs/xfs_trans.c')
-rw-r--r--fs/xfs/xfs_trans.c203
1 files changed, 42 insertions, 161 deletions
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28b983ff8b11..3c94e5ff4316 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -68,7 +68,6 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_);
- atomic_dec(&tp->t_mountp->m_active_trans);
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp);
@@ -125,8 +124,6 @@ xfs_trans_dup(
xfs_defer_move(ntp, tp);
xfs_trans_dup_dqinfo(tp, ntp);
-
- atomic_inc(&tp->t_mountp->m_active_trans);
return ntp;
}
@@ -275,7 +272,6 @@ xfs_trans_alloc(
*/
WARN_ON(resp->tr_logres > 0 &&
mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
- atomic_inc(&mp->m_active_trans);
tp->t_magic = XFS_TRANS_HEADER_MAGIC;
tp->t_flags = flags;
@@ -299,20 +295,19 @@ xfs_trans_alloc(
/*
* Create an empty transaction with no reservation. This is a defensive
- * mechanism for routines that query metadata without actually modifying
- * them -- if the metadata being queried is somehow cross-linked (think a
- * btree block pointer that points higher in the tree), we risk deadlock.
- * However, blocks grabbed as part of a transaction can be re-grabbed.
- * The verifiers will notice the corrupt block and the operation will fail
- * back to userspace without deadlocking.
+ * mechanism for routines that query metadata without actually modifying them --
+ * if the metadata being queried is somehow cross-linked (think a btree block
+ * pointer that points higher in the tree), we risk deadlock. However, blocks
+ * grabbed as part of a transaction can be re-grabbed. The verifiers will
+ * notice the corrupt block and the operation will fail back to userspace
+ * without deadlocking.
*
- * Note the zero-length reservation; this transaction MUST be cancelled
- * without any dirty data.
+ * Note the zero-length reservation; this transaction MUST be cancelled without
+ * any dirty data.
*
- * Callers should obtain freeze protection to avoid two conflicts with fs
- * freezing: (1) having active transactions trip the m_active_trans ASSERTs;
- * and (2) grabbing buffers at the same time that freeze is trying to drain
- * the buffer LRU list.
+ * Callers should obtain freeze protection to avoid a conflict with fs freezing
+ * where we can be grabbing buffers at the same time that freeze is trying to
+ * drain the buffer LRU list.
*/
int
xfs_trans_alloc_empty(
@@ -534,57 +529,9 @@ xfs_trans_apply_sb_deltas(
sizeof(sbp->sb_frextents) - 1);
}
-STATIC int
-xfs_sb_mod8(
- uint8_t *field,
- int8_t delta)
-{
- int8_t counter = *field;
-
- counter += delta;
- if (counter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- *field = counter;
- return 0;
-}
-
-STATIC int
-xfs_sb_mod32(
- uint32_t *field,
- int32_t delta)
-{
- int32_t counter = *field;
-
- counter += delta;
- if (counter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- *field = counter;
- return 0;
-}
-
-STATIC int
-xfs_sb_mod64(
- uint64_t *field,
- int64_t delta)
-{
- int64_t counter = *field;
-
- counter += delta;
- if (counter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- *field = counter;
- return 0;
-}
-
/*
- * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
- * and apply superblock counter changes to the in-core superblock. The
+ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
+ * apply superblock counter changes to the in-core superblock. The
* t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
* applied to the in-core superblock. The idea is that that has already been
* done.
@@ -593,7 +540,12 @@ xfs_sb_mod64(
* used block counts are not updated in the on disk superblock. In this case,
* XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
* still need to update the incore superblock with the changes.
+ *
+ * Deltas for the inode count are +/-64, hence we use a large batch size of 128
+ * so we don't need to take the counter lock on every update.
*/
+#define XFS_ICOUNT_BATCH 128
+
void
xfs_trans_unreserve_and_mod_sb(
struct xfs_trans *tp)
@@ -629,20 +581,21 @@ xfs_trans_unreserve_and_mod_sb(
/* apply the per-cpu counters */
if (blkdelta) {
error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
- if (error)
- goto out;
+ ASSERT(!error);
}
if (idelta) {
- error = xfs_mod_icount(mp, idelta);
- if (error)
- goto out_undo_fdblocks;
+ percpu_counter_add_batch(&mp->m_icount, idelta,
+ XFS_ICOUNT_BATCH);
+ if (idelta < 0)
+ ASSERT(__percpu_counter_compare(&mp->m_icount, 0,
+ XFS_ICOUNT_BATCH) >= 0);
}
if (ifreedelta) {
- error = xfs_mod_ifree(mp, ifreedelta);
- if (error)
- goto out_undo_icount;
+ percpu_counter_add(&mp->m_ifree, ifreedelta);
+ if (ifreedelta < 0)
+ ASSERT(percpu_counter_compare(&mp->m_ifree, 0) >= 0);
}
if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
@@ -650,95 +603,23 @@ xfs_trans_unreserve_and_mod_sb(
/* apply remaining deltas */
spin_lock(&mp->m_sb_lock);
- if (rtxdelta) {
- error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
- if (error)
- goto out_undo_ifree;
- }
-
- if (tp->t_dblocks_delta != 0) {
- error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
- if (error)
- goto out_undo_frextents;
- }
- if (tp->t_agcount_delta != 0) {
- error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
- if (error)
- goto out_undo_dblocks;
- }
- if (tp->t_imaxpct_delta != 0) {
- error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
- if (error)
- goto out_undo_agcount;
- }
- if (tp->t_rextsize_delta != 0) {
- error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
- tp->t_rextsize_delta);
- if (error)
- goto out_undo_imaxpct;
- }
- if (tp->t_rbmblocks_delta != 0) {
- error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
- tp->t_rbmblocks_delta);
- if (error)
- goto out_undo_rextsize;
- }
- if (tp->t_rblocks_delta != 0) {
- error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
- if (error)
- goto out_undo_rbmblocks;
- }
- if (tp->t_rextents_delta != 0) {
- error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
- tp->t_rextents_delta);
- if (error)
- goto out_undo_rblocks;
- }
- if (tp->t_rextslog_delta != 0) {
- error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
- tp->t_rextslog_delta);
- if (error)
- goto out_undo_rextents;
- }
+ mp->m_sb.sb_frextents += rtxdelta;
+ mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
+ mp->m_sb.sb_agcount += tp->t_agcount_delta;
+ mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
+ mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
+ mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
+ mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
+ mp->m_sb.sb_rextents += tp->t_rextents_delta;
+ mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
spin_unlock(&mp->m_sb_lock);
- return;
-out_undo_rextents:
- if (tp->t_rextents_delta)
- xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
-out_undo_rblocks:
- if (tp->t_rblocks_delta)
- xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
-out_undo_rbmblocks:
- if (tp->t_rbmblocks_delta)
- xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
-out_undo_rextsize:
- if (tp->t_rextsize_delta)
- xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
-out_undo_imaxpct:
- if (tp->t_rextsize_delta)
- xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
-out_undo_agcount:
- if (tp->t_agcount_delta)
- xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
-out_undo_dblocks:
- if (tp->t_dblocks_delta)
- xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
-out_undo_frextents:
- if (rtxdelta)
- xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
-out_undo_ifree:
- spin_unlock(&mp->m_sb_lock);
- if (ifreedelta)
- xfs_mod_ifree(mp, -ifreedelta);
-out_undo_icount:
- if (idelta)
- xfs_mod_icount(mp, -idelta);
-out_undo_fdblocks:
- if (blkdelta)
- xfs_mod_fdblocks(mp, -blkdelta, rsvd);
-out:
- ASSERT(error == 0);
+ /*
+ * Debug checks outside of the spinlock so they don't lock up the
+ * machine if they fail.
+ */
+ ASSERT(mp->m_sb.sb_imax_pct >= 0);
+ ASSERT(mp->m_sb.sb_rextslog >= 0);
return;
}