From cc6f77710a6de6210f9feda7cd53e2f5ee7a7e69 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2017 09:41:16 -0700 Subject: xfs: don't unconditionally clear the reflink flag on zero-block files If we have speculative cow preallocations hanging around in the cow fork, don't let a truncate operation clear the reflink flag because if we do then there's a chance we'll forget to free those extents when we destroy the incore inode. Reported-by: Amir Goldstein Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5599dda4727a..4ec5b7f45401 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1624,10 +1624,12 @@ xfs_itruncate_extents( goto out; /* - * Clear the reflink flag if we truncated everything. + * Clear the reflink flag if there are no data fork blocks and + * there are no extents staged in the cow fork. */ - if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { + if (ip->i_d.di_nblocks == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; xfs_inode_clear_cowblocks_tag(ip); } -- cgit v1.2.3 From 3af423b03435c81036fa710623d3ae92fbe346a3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2017 09:41:17 -0700 Subject: xfs: evict CoW fork extents when performing finsert/fcollapse When we perform an finsert/fcollapse operation, cancel all the CoW extents for the affected file offset range so that they don't end up pointing to the wrong blocks. Reported-by: Amir Goldstein Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index cd9a5400ba4f..bc6c6e10a969 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1459,7 +1459,19 @@ xfs_shift_file_space( return error; /* - * The extent shiting code works on extent granularity. So, if + * Clean out anything hanging around in the cow fork now that + * we've flushed all the dirty data out to disk to avoid having + * CoW extents at the wrong offsets. + */ + if (xfs_is_reflink_inode(ip)) { + error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, + true); + if (error) + return error; + } + + /* + * The extent shifting code works on extent granularity. So, if * stop_fsb is not the starting block of extent, we need to split * the extent at stop_fsb. */ -- cgit v1.2.3 From e150dcd459e1b441eaf08f341a986f04e61bf3b8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 18 Sep 2017 11:34:16 -0700 Subject: fs/xfs: Use %pS printk format for direct addresses Use the %pS instead of the %pF printk format specifier for printing symbols from direct addresses. This is needed for the ia64, ppc64 and parisc64 architectures. Signed-off-by: Helge Deller Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index bd786a9ac2c3..eaf86f55b7f2 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -347,7 +347,7 @@ xfs_verifier_error( { struct xfs_mount *mp = bp->b_target->bt_mount; - xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx", + xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", __return_address, bp->b_ops->name, bp->b_bn); -- cgit v1.2.3 From 64671bafbdd984535aa382bccadd91fbe7be0e80 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Mon, 18 Sep 2017 11:38:58 -0700 Subject: xfs: kill meaningless variable 'zero' In xfs_file_aio_write_checks(), variable 'zero' is there only to satisfy xfs_zero_eof(), the result of it is ignored. Now, with iomap_zero_range() based xfs_zero_eof(), we can safely pass NULL as the last param of it and kill 'zero'. Signed-off-by: Eryu Guan Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ebdd0bd2b261..261d83f1db76 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -377,8 +377,6 @@ restart: */ spin_lock(&ip->i_flags_lock); if (iocb->ki_pos > i_size_read(inode)) { - bool zero = false; - spin_unlock(&ip->i_flags_lock); if (!drained_dio) { if (*iolock == XFS_IOLOCK_SHARED) { @@ -399,7 +397,7 @@ restart: drained_dio = true; goto restart; } - error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); + error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL); if (error) return error; } else -- cgit v1.2.3 From d20a5e3851969fa685f118a80e4df670255a4e8d Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Mon, 18 Sep 2017 11:39:23 -0700 Subject: xfs: report zeroed or not correctly in xfs_zero_range() The 'did_zero' param of xfs_zero_range() was not passed to iomap_zero_range() correctly. This was introduced by commit 7bb41db3ea16 ("xfs: handle 64-bit length in xfs_iozero"), and found by code inspection. Signed-off-by: Eryu Guan Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 261d83f1db76..350b6d43ba23 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -58,7 +58,7 @@ xfs_zero_range( xfs_off_t count, bool *did_zero) { - return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); + return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops); } int -- cgit v1.2.3 From 1e6fa688bffc0ff419a4c3e78dbaf7aabfb55183 Mon Sep 17 00:00:00 2001 From: Kenjiro Nakayama Date: Mon, 18 Sep 2017 12:03:56 -0700 Subject: xfs: Output warning message when discard option was enabled even though the device does not support discard In order to using discard function, it is necessary that not only xfs is mounted with discard option, but also the discard function is supported by the device. Current code doesn't output any message when users mount with discard option on unsupported device, so it is difficult to notice that it was not enabled actually. This patch adds the warning message to notice that discard option is not enabled due to unsupported device when the filesystem is mounted. Changes in v2 (Suggested by Brian Foster): - Move the unsupported device check into xfs_fs_fill_super(). - Clear the discard flag when device is unsupported. Signed-off-by: Kenjiro Nakayama Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c996f4ae4a5f..584cf2d573ba 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1654,6 +1654,16 @@ xfs_fs_fill_super( "DAX and reflink have not been tested together!"); } + if (mp->m_flags & XFS_MOUNT_DISCARD) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) { + xfs_warn(mp, "mounting with \"discard\" option, but " + "the device does not support discard"); + mp->m_flags &= ~XFS_MOUNT_DISCARD; + } + } + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, -- cgit v1.2.3 From 60915f83cd1e021a66fc1503a446aef5c772553a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Sep 2017 13:38:46 -0700 Subject: xfs: remove redundant re-initialization of total_nr_pages Variable total_nr_pages is being initialized and then updated with the same value, this latter assignment is redundant and can be removed. Cleans up clang build warning: Value stored to 'total_nr_pages' during its initialization is never read Signed-off-by: Colin Ian King Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index da14658da310..2f97c12ca75e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map( int size; int offset; - total_nr_pages = bp->b_page_count; - /* skip the pages in the buffer before the start offset */ page_index = 0; offset = *buf_offset; -- cgit v1.2.3 From 6851a3db7e224bbb85e23b3c64a506c9e0904382 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 18 Sep 2017 14:46:03 -0700 Subject: xfs: validate bdev support for DAX inode flag Currently only the blocksize is checked, but we should really be calling bdev_dax_supported() which also tests to make sure we can get a struct dax_device and that the dax_direct_access() path is working. This is the same check that we do for the "-o dax" mount option in xfs_fs_fill_super(). This does not fix the race issues that caused the XFS DAX inode option to be disabled, so that option will still be disabled. If/when we re-enable it, though, I think we will want this issue to have been fixed. I also do think that we want to fix this in stable kernels. Signed-off-by: Ross Zwisler CC: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5049e8ab6e30..aa75389be8cf 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate( int *join_flags) { struct inode *inode = VFS_I(ip); + struct super_block *sb = inode->i_sb; int error; *join_flags = 0; @@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate( if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; - if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) + if (bdev_dax_supported(sb, sb->s_blocksize) < 0) return -EINVAL; } -- cgit v1.2.3 From 546e7be8244dc050effef0555df5b8d94d10dafc Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Fri, 22 Sep 2017 11:47:33 -0700 Subject: iomap_dio_rw: Allocate AIO completion queue before submitting dio Executing xfs/104 test in a loop on Linux-v4.13 kernel on a ppc64 machine can cause the following NULL pointer dereference, .queue_work_on+0x4c/0x80 .iomap_dio_bio_end_io+0xbc/0x1f0 .bio_endio+0x118/0x1f0 .blk_update_request+0xd0/0x470 .blk_mq_end_request+0x24/0xc0 .lo_complete_rq+0x40/0xe0 .__blk_mq_complete_request_remote+0x28/0x40 .flush_smp_call_function_queue+0xc4/0x1e0 .smp_ipi_demux_relaxed+0x8c/0x100 .icp_hv_ipi_action+0x54/0xa0 .__handle_irq_event_percpu+0x84/0x2c0 .handle_irq_event_percpu+0x28/0x80 .handle_percpu_irq+0x78/0xc0 .generic_handle_irq+0x40/0x70 .__do_irq+0x88/0x200 .call_do_irq+0x14/0x24 .do_IRQ+0x84/0x130 This occurs due to the following sequence of events, 1. Allocate dio for Direct I/O write. 2. Invoke iomap_apply() until iov_iter_count() bytes have been submitted. - Assume that we have submitted atleast one bio. Hence iomap_dio->ref value will be >= 2. - If during the second iteration, iomap_apply() ends up returning -ENOSPC, we would break out of the loop and since the 'ret' value is a negative number we end up not allocating memory for super_block->s_dio_done_wq. 3. Meanwhile, iomap_dio_bio_end_io() is invoked for bios that have been submitted and here the code ends up dereferencing the NULL pointer stored at super_block->s_dio_done_wq. This commit fixes the bug by allocating memory for super_block->s_dio_done_wq before iomap_apply() is invoked. Reported-by: Eryu Guan Reviewed-by: Christoph Hellwig Tested-by: Eryu Guan Signed-off-by: Chandan Rajendra Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/iomap.c b/fs/iomap.c index 269b24a01f32..d4f526a3f5b2 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -993,6 +993,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, WARN_ON_ONCE(ret); ret = 0; + if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && + !inode->i_sb->s_dio_done_wq) { + ret = sb_init_dio_done_wq(inode->i_sb); + if (ret < 0) + goto out_free_dio; + } + inode_dio_begin(inode); blk_start_plug(&plug); @@ -1015,13 +1022,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret < 0) iomap_dio_set_error(dio, ret); - if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && - !inode->i_sb->s_dio_done_wq) { - ret = sb_init_dio_done_wq(inode->i_sb); - if (ret < 0) - iomap_dio_set_error(dio, ret); - } - if (!atomic_dec_and_test(&dio->ref)) { if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; -- cgit v1.2.3 From ee70daaba82d70766d0723b743d9fdeb3b06102a Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 21 Sep 2017 11:26:18 -0700 Subject: xfs: update i_size after unwritten conversion in dio completion Since commit d531d91d6990 ("xfs: always use unwritten extents for direct I/O writes"), we start allocating unwritten extents for all direct writes to allow appending aio in XFS. But for dio writes that could extend file size we update the in-core inode size first, then convert the unwritten extents to real allocations at dio completion time in xfs_dio_write_end_io(). Thus a racing direct read could see the new i_size and find the unwritten extents first and read zeros instead of actual data, if the direct writer also takes a shared iolock. Fix it by updating the in-core inode size after the unwritten extent conversion. To do this, introduce a new boolean argument to xfs_iomap_write_unwritten() to tell if we want to update in-core i_size or not. Suggested-by: Brian Foster Reviewed-by: Brian Foster Signed-off-by: Eryu Guan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 3 ++- fs/xfs/xfs_file.c | 33 +++++++++++++++++++-------------- fs/xfs/xfs_iomap.c | 7 +++++-- fs/xfs/xfs_iomap.h | 2 +- fs/xfs/xfs_pnfs.c | 2 +- 5 files changed, 28 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 29172609f2a3..f18e5932aec4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -343,7 +343,8 @@ xfs_end_io( error = xfs_reflink_end_cow(ip, offset, size); break; case XFS_IO_UNWRITTEN: - error = xfs_iomap_write_unwritten(ip, offset, size); + /* writeback should never update isize */ + error = xfs_iomap_write_unwritten(ip, offset, size, false); break; default: ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 350b6d43ba23..309e26c9dddb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -434,7 +434,6 @@ xfs_dio_write_end_io( struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); loff_t offset = iocb->ki_pos; - bool update_size = false; int error = 0; trace_xfs_end_io_direct_write(ip, offset, size); @@ -445,6 +444,21 @@ xfs_dio_write_end_io( if (size <= 0) return size; + if (flags & IOMAP_DIO_COW) { + error = xfs_reflink_end_cow(ip, offset, size); + if (error) + return error; + } + + /* + * Unwritten conversion updates the in-core isize after extent + * conversion but before updating the on-disk size. Updating isize any + * earlier allows a racing dio read to find unwritten extents before + * they are converted. + */ + if (flags & IOMAP_DIO_UNWRITTEN) + return xfs_iomap_write_unwritten(ip, offset, size, true); + /* * We need to update the in-core inode size here so that we don't end up * with the on-disk inode size being outside the in-core inode size. We @@ -459,20 +473,11 @@ xfs_dio_write_end_io( spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) { i_size_write(inode, offset + size); - update_size = true; - } - spin_unlock(&ip->i_flags_lock); - - if (flags & IOMAP_DIO_COW) { - error = xfs_reflink_end_cow(ip, offset, size); - if (error) - return error; - } - - if (flags & IOMAP_DIO_UNWRITTEN) - error = xfs_iomap_write_unwritten(ip, offset, size); - else if (update_size) + spin_unlock(&ip->i_flags_lock); error = xfs_setfilesize(ip, offset, size); + } else { + spin_unlock(&ip->i_flags_lock); + } return error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a1909bc064e9..f179bdf1644d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -829,7 +829,8 @@ int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool update_isize) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -840,6 +841,7 @@ xfs_iomap_write_unwritten( xfs_trans_t *tp; xfs_bmbt_irec_t imap; struct xfs_defer_ops dfops; + struct inode *inode = VFS_I(ip); xfs_fsize_t i_size; uint resblks; int error; @@ -899,7 +901,8 @@ xfs_iomap_write_unwritten( i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); if (i_size > offset + count) i_size = offset + count; - + if (update_isize && i_size > i_size_read(inode)) + i_size_write(inode, i_size); i_size = xfs_new_eof(ip, i_size); if (i_size) { ip->i_d.di_size = i_size; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 00db3ecea084..ee535065c5d0 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 2f2dc3c09ad0..4246876df7b7 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -274,7 +274,7 @@ xfs_fs_commit_blocks( (end - 1) >> PAGE_SHIFT); WARN_ON_ONCE(error); - error = xfs_iomap_write_unwritten(ip, start, length); + error = xfs_iomap_write_unwritten(ip, start, length, false); if (error) goto out_drop_iolock; } -- cgit v1.2.3 From 9789dd9e1d939232e8ff4c50ef8e75aa6781b3fb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2017 09:42:09 -0700 Subject: xfs: perag initialization should only touch m_ag_max_usable for AG 0 We call __xfs_ag_resv_init to make a per-AG reservation for each AG. This makes the reservation per-AG, not per-filesystem. Therefore, it is incorrect to adjust m_ag_max_usable for each AG. Adjust it only when we're reserving AG 0's blocks so that we only do it once per fs. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_ag_resv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index b008ff3250eb..df3e600835e8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -156,7 +156,8 @@ __xfs_ag_resv_free( trace_xfs_ag_resv_free(pag, type, 0); resv = xfs_perag_resv(pag, type); - pag->pag_mount->m_ag_max_usable += resv->ar_asked; + if (pag->pag_agno == 0) + pag->pag_mount->m_ag_max_usable += resv->ar_asked; /* * AGFL blocks are always considered "free", so whatever * was reserved at mount time must be given back at umount. @@ -216,7 +217,14 @@ __xfs_ag_resv_init( return error; } - mp->m_ag_max_usable -= ask; + /* + * Reduce the maximum per-AG allocation length by however much we're + * trying to reserve for an AG. Since this is a filesystem-wide + * counter, we only make the adjustment for AG 0. This assumes that + * there aren't any AGs hungrier for per-AG reservation than AG 0. + */ + if (pag->pag_agno == 0) + mp->m_ag_max_usable -= ask; resv = xfs_perag_resv(pag, type); resv->ar_asked = ask; -- cgit v1.2.3 From 842f6e9f786226c58fcbd5ef80eadca72fdfe652 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Fri, 22 Sep 2017 11:47:46 -0700 Subject: xfs: Capture state of the right inode in xfs_iflush_done My previous patch: d3a304b6292168b83b45d624784f973fdc1ca674 check for XFS_LI_FAILED flag xfs_iflush done, so the failed item can be properly resubmitted. In the loop scanning other inodes being completed, it should check the current item for the XFS_LI_FAILED, and not the initial one. The state of the initial inode is checked after the loop ends Kudos to Eric for catching this. Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode_item.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6d0f74ec31e8..a705f34b58fa 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -745,7 +745,7 @@ xfs_iflush_done( */ iip = INODE_ITEM(blip); if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || - lip->li_flags & XFS_LI_FAILED) + (blip->li_flags & XFS_LI_FAILED)) need_ail++; blip = next; -- cgit v1.2.3 From 5e5c943c1f257c2b3424fc3f8a7b18570152dab3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2017 09:41:17 -0700 Subject: xfs: revert "xfs: factor rmap btree size into the indlen calculations" In commit fd26a88093ba we added a worst case estimate for rmapbt blocks needed to satisfy the block mapping request. Since then, we added the ability to reserve enough space in each AG such that we should never run out of blocks to grow the rmapbt, which makes this calculation unnecessary. Revert the commit because it makes the extra delalloc indlen accounting unnecessary and incorrect. Reported-by: Eryu Guan Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 459f4b4f08fe..044a363119be 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,7 +49,6 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" -#include "xfs_rmap_btree.h" #include "xfs_icache.h" @@ -192,12 +191,8 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ - xfs_filblks_t orig_len; mp = ip->i_mount; - - /* Calculate the worst-case size of the bmbt. */ - orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -205,20 +200,12 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) { - rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) + return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; - break; - } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } - - /* Calculate the worst-case size of the rmapbt. */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) - rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + - mp->m_rmap_maxlevels; - return rval; } -- cgit v1.2.3