From 54b3b1f619efbc838870735db2ddde9450f93a8f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 6 May 2020 13:25:19 -0700 Subject: xfs: factor out buffer I/O failure code We use the same buffer I/O failure code in a few different places. It's not much code, but it's not necessarily self-explanatory. Factor it into a helper and document it in one place. Signed-off-by: Brian Foster Reviewed-by: Allison Collins Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9ec3eaf1c618..d5d6a68bb1e6 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1248,6 +1248,22 @@ xfs_buf_ioerror_alert( -bp->b_error); } +/* + * To simulate an I/O failure, the buffer must be locked and held with at least + * three references. The LRU reference is dropped by the stale call. The buf + * item reference is dropped via ioend processing. The third reference is owned + * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC. + */ +void +xfs_buf_ioend_fail( + struct xfs_buf *bp) +{ + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioerror(bp, -EIO); + xfs_buf_ioend(bp); +} + int xfs_bwrite( struct xfs_buf *bp) @@ -1480,10 +1496,7 @@ __xfs_buf_submit( /* on shutdown we stale and complete the buffer immediately */ if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { - xfs_buf_ioerror(bp, -EIO); - bp->b_flags &= ~XBF_DONE; - xfs_buf_stale(bp); - xfs_buf_ioend(bp); + xfs_buf_ioend_fail(bp); return -EIO; } -- cgit v1.2.3 From b6983e80b03bd4fd42de71993b3ac7403edac758 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 6 May 2020 13:25:20 -0700 Subject: xfs: reset buffer write failure state on successful completion The buffer write failure flag is intended to control the internal write retry that XFS has historically implemented to help mitigate the severity of transient I/O errors. The flag is set when a buffer is resubmitted from the I/O completion path due to a previous failure. It is checked on subsequent I/O completions to skip the internal retry and fall through to the higher level configurable error handling mechanism. The flag is cleared in the synchronous and delwri submission paths and also checked in various places to log write failure messages. There are a couple minor problems with the current usage of this flag. One is that we issue an internal retry after every submission from xfsaild due to how delwri submission clears the flag. This results in double the expected or configured number of write attempts when under sustained failures. Another more subtle issue is that the flag is never cleared on successful I/O completion. This can cause xfs_wait_buftarg() to suggest that dirty buffers are being thrown away due to the existence of the flag, when the reality is that the flag might still be set because the write succeeded on the retry. Clear the write failure flag on successful I/O completion to address both of these problems. This means that the internal retry attempt occurs once since the last time a buffer write failed and that various other contexts only see the flag set when the immediately previous write attempt has failed. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Allison Collins Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d5d6a68bb1e6..fd76a84cefdd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1197,8 +1197,10 @@ xfs_buf_ioend( bp->b_ops->verify_read(bp); } - if (!bp->b_error) + if (!bp->b_error) { + bp->b_flags &= ~XBF_WRITE_FAIL; bp->b_flags |= XBF_DONE; + } if (bp->b_iodone) (*(bp->b_iodone))(bp); @@ -1274,7 +1276,7 @@ xfs_bwrite( bp->b_flags |= XBF_WRITE; bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | - XBF_WRITE_FAIL | XBF_DONE); + XBF_DONE); error = xfs_buf_submit(bp); if (error) @@ -1996,7 +1998,7 @@ xfs_buf_delwri_submit_buffers( * synchronously. Otherwise, drop the buffer from the delwri * queue and submit async. */ - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL); + bp->b_flags &= ~_XBF_DELWRI_Q; bp->b_flags |= XBF_WRITE; if (wait_list) { bp->b_flags &= ~XBF_ASYNC; -- cgit v1.2.3 From f9bccfcc3b59b9aba64791ab3a2bfefe681ab75b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 6 May 2020 13:25:21 -0700 Subject: xfs: refactor ratelimited buffer error messages into helper XFS has some inconsistent log message rate limiting with respect to buffer alerts. The metadata I/O error notification uses the generic ratelimited alert, the buffer push code uses a custom rate limit and the similar quiesce time failure checks are not rate limited at all (when they should be). The custom rate limit defined in the buf item code is specifically crafted for buffer alerts. It is more aggressive than generic rate limiting code because it must accommodate a high frequency of I/O error events in a relative short timeframe. Factor out the custom rate limit state from the buf item code into a per-buftarg rate limit so various alerts are limited based on the target. Define a buffer alert helper function and use it for the buffer alerts that are already ratelimited. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Allison Collins Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 15 +++++++++++---- fs/xfs/xfs_buf.h | 1 + fs/xfs/xfs_buf_item.c | 17 ++++------------- fs/xfs/xfs_message.c | 22 ++++++++++++++++++++++ fs/xfs/xfs_message.h | 3 +++ 5 files changed, 41 insertions(+), 17 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index fd76a84cefdd..594d5e1df6f8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1244,10 +1244,10 @@ xfs_buf_ioerror_alert( struct xfs_buf *bp, xfs_failaddr_t func) { - xfs_alert_ratelimited(bp->b_mount, -"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", - func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, - -bp->b_error); + xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error", + "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", + func, (uint64_t)XFS_BUF_ADDR(bp), + bp->b_length, -bp->b_error); } /* @@ -1828,6 +1828,13 @@ xfs_alloc_buftarg( btp->bt_bdev = bdev; btp->bt_daxdev = dax_dev; + /* + * Buffer IO error rate limiting. Limit it to no more than 10 messages + * per 30 seconds so as to not spam logs too much on repeated errors. + */ + ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ, + DEFAULT_RATELIMIT_BURST); + if (xfs_setsize_buftarg_early(btp, bdev)) goto error_free; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 06ea3eef866e..050c53b739e2 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -91,6 +91,7 @@ typedef struct xfs_buftarg { struct list_lru bt_lru; struct percpu_counter bt_io_count; + struct ratelimit_state bt_ioerror_rl; } xfs_buftarg_t; struct xfs_buf; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index b452a399a441..1f7acffc99ba 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -481,14 +481,6 @@ xfs_buf_item_unpin( } } -/* - * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 - * seconds so as to not spam logs too much on repeated detection of the same - * buffer being bad.. - */ - -static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); - STATIC uint xfs_buf_item_push( struct xfs_log_item *lip, @@ -518,11 +510,10 @@ xfs_buf_item_push( trace_xfs_buf_item_push(bip); /* has a previous flush failed due to IO errors? */ - if ((bp->b_flags & XBF_WRITE_FAIL) && - ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { - xfs_warn(bp->b_mount, -"Failing async write on buffer block 0x%llx. Retrying async write.", - (long long)bp->b_bn); + if (bp->b_flags & XBF_WRITE_FAIL) { + xfs_buf_alert_ratelimited(bp, "XFS: Failing async write", + "Failing async write on buffer block 0x%llx. Retrying async write.", + (long long)bp->b_bn); } if (!xfs_buf_delwri_queue(bp, buffer_list)) diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index e0f9d3b6abe9..bc66d95c8d4c 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -117,3 +117,25 @@ xfs_hex_dump(const void *p, int length) { print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); } + +void +xfs_buf_alert_ratelimited( + struct xfs_buf *bp, + const char *rlmsg, + const char *fmt, + ...) +{ + struct xfs_mount *mp = bp->b_mount; + struct va_format vaf; + va_list args; + + /* use the more aggressive per-target rate limit for buffers */ + if (!___ratelimit(&bp->b_target->bt_ioerror_rl, rlmsg)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + __xfs_printk(KERN_ALERT, mp, &vaf); + va_end(args); +} diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 802a96190d22..4d9bd6bb63ca 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -79,4 +79,7 @@ void asswarn(struct xfs_mount *mp, char *expr, char *f, int l); extern void xfs_hex_dump(const void *p, int length); +void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg, + const char *fmt, ...); + #endif /* __XFS_MESSAGE_H */ -- cgit v1.2.3 From 61948b6fb276ad95ad63f82863e607719a31a628 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 6 May 2020 13:25:21 -0700 Subject: xfs: ratelimit unmount time per-buffer I/O error alert At unmount time, XFS emits an alert for every in-core buffer that might have undergone a write error. In practice this behavior is probably reasonable given that the filesystem is likely short lived once I/O errors begin to occur consistently. Under certain test or otherwise expected error conditions, this can spam the logs and slow down the unmount. Now that we have a ratelimit mechanism specifically for buffer alerts, reuse it for the per-buffer alerts in xfs_wait_buftarg(). Also lift the final repair message out of the loop so it always prints and assert that the metadata error handling code has shut down the fs. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Allison Collins Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 594d5e1df6f8..3918270f4eab 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1657,7 +1657,8 @@ xfs_wait_buftarg( struct xfs_buftarg *btp) { LIST_HEAD(dispose); - int loop = 0; + int loop = 0; + bool write_fail = false; /* * First wait on the buftarg I/O count for all in-flight buffers to be @@ -1685,17 +1686,29 @@ xfs_wait_buftarg( bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); if (bp->b_flags & XBF_WRITE_FAIL) { - xfs_alert(btp->bt_mount, + write_fail = true; + xfs_buf_alert_ratelimited(bp, + "XFS: Corruption Alert", "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", (long long)bp->b_bn); - xfs_alert(btp->bt_mount, -"Please run xfs_repair to determine the extent of the problem."); } xfs_buf_rele(bp); } if (loop++ != 0) delay(100); } + + /* + * If one or more failed buffers were freed, that means dirty metadata + * was thrown away. This should only ever happen after I/O completion + * handling has elevated I/O error(s) to permanent failures and shuts + * down the fs. + */ + if (write_fail) { + ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount)); + xfs_alert(btp->bt_mount, + "Please run xfs_repair to determine the extent of the problem."); + } } static enum lru_status -- cgit v1.2.3 From 7376d74547344598008d00419eae0caa5f50f4f0 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 6 May 2020 13:29:19 -0700 Subject: xfs: random buffer write failure errortag Introduce an error tag to randomly fail async buffer writes. This is primarily to facilitate testing of the XFS error configuration mechanism. Signed-off-by: Brian Foster Reviewed-by: Allison Collins Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_errortag.h | 4 +++- fs/xfs/xfs_buf.c | 6 ++++++ fs/xfs/xfs_error.c | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 79e6c4fb1d8a..2486dab19023 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -55,7 +55,8 @@ #define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32 #define XFS_ERRTAG_FORCE_SUMMARY_RECALC 33 #define XFS_ERRTAG_IUNLINK_FALLBACK 34 -#define XFS_ERRTAG_MAX 35 +#define XFS_ERRTAG_BUF_IOERROR 35 +#define XFS_ERRTAG_MAX 36 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -95,5 +96,6 @@ #define XFS_RANDOM_FORCE_SCRUB_REPAIR 1 #define XFS_RANDOM_FORCE_SUMMARY_RECALC 1 #define XFS_RANDOM_IUNLINK_FALLBACK (XFS_RANDOM_DEFAULT/10) +#define XFS_RANDOM_BUF_IOERROR XFS_RANDOM_DEFAULT #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 3918270f4eab..9d8841ac7375 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1289,6 +1289,12 @@ xfs_buf_bio_end_io( struct bio *bio) { struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private; + struct xfs_mount *mp = bp->b_mount; + + if (!bio->bi_status && + (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) && + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BUF_IOERROR)) + bio->bi_status = BLK_STS_IOERR; /* * don't overwrite existing errors - otherwise we can lose errors on diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index a21e9cc6516a..7f6e20899473 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -53,6 +53,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_FORCE_SCRUB_REPAIR, XFS_RANDOM_FORCE_SUMMARY_RECALC, XFS_RANDOM_IUNLINK_FALLBACK, + XFS_RANDOM_BUF_IOERROR, }; struct xfs_errortag_attr { @@ -162,6 +163,7 @@ XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR); XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC); XFS_ERRORTAG_ATTR_RW(iunlink_fallback, XFS_ERRTAG_IUNLINK_FALLBACK); +XFS_ERRORTAG_ATTR_RW(buf_ioerror, XFS_ERRTAG_BUF_IOERROR); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), @@ -199,6 +201,7 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(force_repair), XFS_ERRORTAG_ATTR_LIST(bad_summary), XFS_ERRORTAG_ATTR_LIST(iunlink_fallback), + XFS_ERRORTAG_ATTR_LIST(buf_ioerror), NULL, }; -- cgit v1.2.3 From 43dc0aa84ef74e3acd908b6d5eaa8ae15d7ecc8a Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 8 May 2020 08:50:52 -0700 Subject: xfs: fix unused variable warning in buffer completion on !DEBUG The random buffer write failure errortag patch introduced a local mount pointer variable for the test macro, but the macro is compiled out on !DEBUG kernels. This results in an unused variable warning. Access the mount structure through the buffer pointer and remove the local mount pointer to address the warning. Fixes: 7376d745473 ("xfs: random buffer write failure errortag") Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9d8841ac7375..9c2fbb6bbf89 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1289,11 +1289,10 @@ xfs_buf_bio_end_io( struct bio *bio) { struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private; - struct xfs_mount *mp = bp->b_mount; if (!bio->bi_status && (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) && - XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BUF_IOERROR)) + XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) bio->bi_status = BLK_STS_IOERR; /* -- cgit v1.2.3