summaryrefslogtreecommitdiff
path: root/drivers/md/dm-zoned-reclaim.c
diff options
context:
space:
mode:
authorDmitry Fomichev <dmitry.fomichev@wdc.com>2019-08-11 00:43:11 +0300
committerMike Snitzer <snitzer@redhat.com>2019-08-15 22:57:42 +0300
commit75d66ffb48efb30f2dd42f041ba8b39c5b2bd115 (patch)
treeb0fa4e0bb31dff25d707243ce74e4cfab7c6a1d4 /drivers/md/dm-zoned-reclaim.c
parentd7428c50118e739e672656c28d2b26b09375d4e0 (diff)
downloadlinux-75d66ffb48efb30f2dd42f041ba8b39c5b2bd115.tar.xz
dm zoned: properly handle backing device failure
dm-zoned is observed to lock up or livelock in case of hardware failure or some misconfiguration of the backing zoned device. This patch adds a new dm-zoned target function that checks the status of the backing device. If the request queue of the backing device is found to be in dying state or the SCSI backing device enters offline state, the health check code sets a dm-zoned target flag prompting all further incoming I/O to be rejected. In order to detect backing device failures timely, this new function is called in the request mapping path, at the beginning of every reclaim run and before performing any metadata I/O. The proper way out of this situation is to do dmsetup remove <dm-zoned target> and recreate the target when the problem with the backing device is resolved. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-zoned-reclaim.c')
-rw-r--r--drivers/md/dm-zoned-reclaim.c18
1 files changed, 16 insertions, 2 deletions
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index e381354dc136..9470b8f77a33 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -37,7 +37,7 @@ enum {
/*
* Number of seconds of target BIO inactivity to consider the target idle.
*/
-#define DMZ_IDLE_PERIOD (10UL * HZ)
+#define DMZ_IDLE_PERIOD (10UL * HZ)
/*
* Percentage of unmapped (free) random zones below which reclaim starts
@@ -134,6 +134,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
while (block < end_block) {
+ if (dev->flags & DMZ_BDEV_DYING)
+ return -EIO;
+
/* Get a valid region from the source zone */
ret = dmz_first_valid_block(zmd, src_zone, &block);
if (ret <= 0)
@@ -451,6 +454,9 @@ static void dmz_reclaim_work(struct work_struct *work)
unsigned int p_unmap_rnd;
int ret;
+ if (dmz_bdev_is_dying(zrc->dev))
+ return;
+
if (!dmz_should_reclaim(zrc)) {
mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
return;
@@ -480,8 +486,16 @@ static void dmz_reclaim_work(struct work_struct *work)
p_unmap_rnd, nr_unmap_rnd, nr_rnd);
ret = dmz_do_reclaim(zrc);
- if (ret)
+ if (ret) {
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
+ if (ret == -EIO)
+ /*
+ * LLD might be performing some error handling sequence
+ * at the underlying device. To not interfere, do not
+ * attempt to schedule the next reclaim run immediately.
+ */
+ return;
+ }
dmz_schedule_reclaim(zrc);
}