summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2022-04-21 09:46:17 +0300
committerDave Chinner <david@fromorbit.com>2022-04-21 09:46:17 +0300
commita44a027a8b2a20fec30e0e9c99b0eb41c03e7420 (patch)
tree2cce770cced23924fd30b40601b52ffb609baab5 /fs/xfs/xfs_inode.c
parent463260d7670566c357dfa2c38bc3124c98b646bc (diff)
parent973ac0eb3a7dfedecd385bd2b48b12e62a0492f2 (diff)
downloadlinux-a44a027a8b2a20fec30e0e9c99b0eb41c03e7420.tar.xz
Merge tag 'large-extent-counters-v9' of https://github.com/chandanr/linux into xfs-5.19-for-next
xfs: Large extent counters The commit xfs: fix inode fork extent count overflow (3f8a4f1d876d3e3e49e50b0396eaffcc4ba71b08) mentions that 10 billion data fork extents should be possible to create. However the corresponding on-disk field has a signed 32-bit type. Hence this patchset extends the per-inode data fork extent counter to 64 bits (out of which 48 bits are used to store the extent count). Also, XFS has an attribute fork extent counter which is 16 bits wide. A workload that, 1. Creates 1 million 255-byte sized xattrs, 2. Deletes 50% of these xattrs in an alternating manner, 3. Tries to insert 400,000 new 255-byte sized xattrs causes the xattr extent counter to overflow. Dave tells me that there are instances where a single file has more than 100 million hardlinks. With parent pointers being stored in xattrs, we will overflow the signed 16-bits wide attribute extent counter when large number of hardlinks are created. Hence this patchset extends the on-disk field to 32-bits. The following changes are made to accomplish this, 1. A 64-bit inode field is carved out of existing di_pad and di_flushiter fields to hold the 64-bit data fork extent counter. 2. The existing 32-bit inode data fork extent counter will be used to hold the attribute fork extent counter. 3. A new incompat superblock flag to prevent older kernels from mounting the filesystem. Signed-off-by: Chandan Babu R <chandan.babu@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c59
1 files changed, 4 insertions, 55 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6c60ff2a4c47..b2879870a17e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1027,11 +1027,6 @@ xfs_create(
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
- error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
- XFS_IEXT_DIR_MANIP_CNT(mp));
- if (error)
- goto out_trans_cancel;
-
/*
* A newly created regular or special file just has one directory
* entry pointing to them, but a directory also the "." entry
@@ -1245,11 +1240,6 @@ xfs_link(
if (error)
goto std_return;
- error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
- XFS_IEXT_DIR_MANIP_CNT(mp));
- if (error)
- goto error_return;
-
/*
* If we are using project inheritance, we only allow hard link
* creation in our tree when the project IDs are the same; else
@@ -3215,35 +3205,6 @@ retry:
/*
* Check for expected errors before we dirty the transaction
* so we can return an error without a transaction abort.
- *
- * Extent count overflow check:
- *
- * From the perspective of src_dp, a rename operation is essentially a
- * directory entry remove operation. Hence the only place where we check
- * for extent count overflow for src_dp is in
- * xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
- * -ENOSPC when it detects a possible extent count overflow and in
- * response, the higher layers of directory handling code do the
- * following:
- * 1. Data/Free blocks: XFS lets these blocks linger until a
- * future remove operation removes them.
- * 2. Dabtree blocks: XFS swaps the blocks with the last block in the
- * Leaf space and unmaps the last block.
- *
- * For target_dp, there are two cases depending on whether the
- * destination directory entry exists or not.
- *
- * When destination directory entry does not exist (i.e. target_ip ==
- * NULL), extent count overflow check is performed only when transaction
- * has a non-zero sized space reservation associated with it. With a
- * zero-sized space reservation, XFS allows a rename operation to
- * continue only when the directory has sufficient free space in its
- * data/leaf/free space blocks to hold the new entry.
- *
- * When destination directory entry exists (i.e. target_ip != NULL), all
- * we need to do is change the inode number associated with the already
- * existing entry. Hence there is no need to perform an extent count
- * overflow check.
*/
if (target_ip == NULL) {
/*
@@ -3254,12 +3215,6 @@ retry:
error = xfs_dir_canenter(tp, target_dp, target_name);
if (error)
goto out_trans_cancel;
- } else {
- error = xfs_iext_count_may_overflow(target_dp,
- XFS_DATA_FORK,
- XFS_IEXT_DIR_MANIP_CNT(mp));
- if (error)
- goto out_trans_cancel;
}
} else {
/*
@@ -3427,18 +3382,12 @@ retry:
* inode number of the whiteout inode rather than removing it
* altogether.
*/
- if (wip) {
+ if (wip)
error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
spaceres);
- } else {
- /*
- * NOTE: We don't need to check for extent count overflow here
- * because the dir remove name code will leave the dir block in
- * place if the extent count would overflow.
- */
+ else
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
spaceres);
- }
if (error)
goto out_trans_cancel;
@@ -3520,8 +3469,8 @@ xfs_iflush(
if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) >
ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: detected corrupt incore inode %Lu, "
- "total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
+ "%s: detected corrupt incore inode %llu, "
+ "total extents = %llu nblocks = %lld, ptr "PTR_FMT,
__func__, ip->i_ino,
ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp),
ip->i_nblocks, ip);