From 994724e6b3f05fb3b6e4b1e87d7e074b65d47bf9 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:02 -0600 Subject: scsi: core: Allow passthrough to request midlayer retries For passthrough we don't retry any error which we get a check condition for. This results in a lot of callers driving their own retries for all UAs, specific UAs, NOT_READY, specific sense values or any type of failure. This adds the core code to allow passthrough users to specify what errors they want the SCSI midlayer to retry for them. We can then convert users to drop a lot of their sense parsing and retry handling. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-2-michael.christie@oracle.com Reviewed-by: John Garry Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 3 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cf3864f72093..16a33a558842 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -184,6 +184,92 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } +void scsi_failures_reset_retries(struct scsi_failures *failures) +{ + struct scsi_failure *failure; + + failures->total_retries = 0; + + for (failure = failures->failure_definitions; failure->result; + failure++) + failure->retries = 0; +} +EXPORT_SYMBOL_GPL(scsi_failures_reset_retries); + +/** + * scsi_check_passthrough - Determine if passthrough scsi_cmnd needs a retry. + * @scmd: scsi_cmnd to check. + * @failures: scsi_failures struct that lists failures to check for. + * + * Returns -EAGAIN if the caller should retry else 0. + */ +static int scsi_check_passthrough(struct scsi_cmnd *scmd, + struct scsi_failures *failures) +{ + struct scsi_failure *failure; + struct scsi_sense_hdr sshdr; + enum sam_status status; + + if (!failures) + return 0; + + for (failure = failures->failure_definitions; failure->result; + failure++) { + if (failure->result == SCMD_FAILURE_RESULT_ANY) + goto maybe_retry; + + if (host_byte(scmd->result) && + host_byte(scmd->result) == host_byte(failure->result)) + goto maybe_retry; + + status = status_byte(scmd->result); + if (!status) + continue; + + if (failure->result == SCMD_FAILURE_STAT_ANY && + !scsi_status_is_good(scmd->result)) + goto maybe_retry; + + if (status != status_byte(failure->result)) + continue; + + if (status_byte(failure->result) != SAM_STAT_CHECK_CONDITION || + failure->sense == SCMD_FAILURE_SENSE_ANY) + goto maybe_retry; + + if (!scsi_command_normalize_sense(scmd, &sshdr)) + return 0; + + if (failure->sense != sshdr.sense_key) + continue; + + if (failure->asc == SCMD_FAILURE_ASC_ANY) + goto maybe_retry; + + if (failure->asc != sshdr.asc) + continue; + + if (failure->ascq == SCMD_FAILURE_ASCQ_ANY || + failure->ascq == sshdr.ascq) + goto maybe_retry; + } + + return 0; + +maybe_retry: + if (failure->allowed) { + if (failure->allowed == SCMD_FAILURE_NO_LIMIT || + ++failure->retries <= failure->allowed) + return -EAGAIN; + } else { + if (failures->total_allowed == SCMD_FAILURE_NO_LIMIT || + ++failures->total_retries <= failures->total_allowed) + return -EAGAIN; + } + + return 0; +} + /** * scsi_execute_cmd - insert request and wait for the result * @sdev: scsi_device @@ -192,7 +278,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) * @buffer: data buffer * @bufflen: len of buffer * @timeout: request timeout in HZ - * @retries: number of times to retry request + * @ml_retries: number of times SCSI midlayer will retry request * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative @@ -200,7 +286,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) */ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, blk_opf_t opf, void *buffer, unsigned int bufflen, - int timeout, int retries, + int timeout, int ml_retries, const struct scsi_exec_args *args) { static const struct scsi_exec_args default_args; @@ -214,6 +300,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, args->sense_len != SCSI_SENSE_BUFFERSIZE)) return -EINVAL; +retry: req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -227,7 +314,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, scmd = blk_mq_rq_to_pdu(req); scmd->cmd_len = COMMAND_SIZE(cmd[0]); memcpy(scmd->cmnd, cmd, scmd->cmd_len); - scmd->allowed = retries; + scmd->allowed = ml_retries; scmd->flags |= args->scmd_flags; req->timeout = timeout; req->rq_flags |= RQF_QUIET; @@ -237,6 +324,11 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, */ blk_execute_rq(req, true); + if (scsi_check_passthrough(scmd, args->failures) == -EAGAIN) { + blk_mq_free_request(req); + goto retry; + } + /* * Some devices (USB mass-storage in particular) may transfer * garbage data together with a residue indicating that the data -- cgit v1.2.3 From 2a1f96f60a4bf28207da653a844ea471840d2b91 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:03 -0600 Subject: scsi: core: Have midlayer retry scsi_probe_lun() errors This has scsi_probe_lun() ask the SCSI midlayer to retry UAs instead of driving them itself. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-3-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 44680f65ea14..a2bed0dbf996 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -626,6 +626,7 @@ void scsi_sanitize_inquiry_string(unsigned char *s, int len) } EXPORT_SYMBOL(scsi_sanitize_inquiry_string); + /** * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY * @sdev: scsi_device to probe @@ -647,10 +648,32 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, int first_inquiry_len, try_inquiry_len, next_inquiry_len; int response_len = 0; int pass, count, result, resid; - struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + /* + * not-ready to ready transition [asc/ascq=0x28/0x0] or + * power-on, reset [asc/ascq=0x29/0x0], continue. INQUIRY + * should not yield UNIT_ATTENTION but many buggy devices do + * so anyway. + */ + { + .sense = UNIT_ATTENTION, + .asc = 0x28, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .total_allowed = 3, + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { - .sshdr = &sshdr, .resid = &resid, + .failures = &failures, }; *bflags = 0; @@ -668,6 +691,8 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, pass, try_inquiry_len)); /* Each pass gets up to three chances to ignore Unit Attention */ + scsi_failures_reset_retries(&failures); + for (count = 0; count < 3; ++count) { memset(scsi_cmd, 0, 6); scsi_cmd[0] = INQUIRY; @@ -684,22 +709,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, "scsi scan: INQUIRY %s with code 0x%x\n", result ? "failed" : "successful", result)); - if (result > 0) { - /* - * not-ready to ready transition [asc/ascq=0x28/0x0] - * or power-on, reset [asc/ascq=0x29/0x0], continue. - * INQUIRY should not yield UNIT_ATTENTION - * but many buggy devices do so anyway. - */ - if (scsi_status_is_check_condition(result) && - scsi_sense_valid(&sshdr)) { - if ((sshdr.sense_key == UNIT_ATTENTION) && - ((sshdr.asc == 0x28) || - (sshdr.asc == 0x29)) && - (sshdr.ascq == 0)) - continue; - } - } else if (result == 0) { + if (result == 0) { /* * if nothing was transferred, we try * again. It's a workaround for some USB -- cgit v1.2.3 From 987d7d3db0b9b5428c4888ed375cca290667a597 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:04 -0600 Subject: scsi: core: Retry INQUIRY after timeout Description from: Martin Wilck : The SCSI mid layer doesn't retry commands after DID_TIME_OUT (see scsi_noretry_cmd()). Packet loss in the fabric can cause spurious timeouts during SCSI device probing, causing device probing to fail. This has been observed in FCoE uplink failover tests, for example. This patch fixes the issue by retrying the INQUIRY. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-4-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin Wilck Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index a2bed0dbf996..8ded08f37337 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -665,6 +665,10 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, .asc = 0x29, .result = SAM_STAT_CHECK_CONDITION, }, + { + .allowed = 1, + .result = DID_TIME_OUT << 16, + }, {} }; struct scsi_failures failures = { -- cgit v1.2.3 From 1008f5776fe5c398e1202c93b835943b04de3ec6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:05 -0600 Subject: scsi: sd: Use separate buf for START_STOP in sd_spinup_disk() We currently reuse the cmd buffer for the TUR and START_STOP commands which requires us to reset the buffer when retrying. This has us use separate buffers for the 2 commands so we can make them const and I think it makes it easier to handle for retries but does not add too much extra to the stack use. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-5-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin Wilck Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0833b3e6aa6e..3d85913d373c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2318,14 +2318,16 @@ sd_spinup_disk(struct scsi_disk *sdkp) * Issue command to spin up drive when not ready */ if (!spintime) { + /* Return immediately and start spin cycle */ + const u8 start_cmd[10] = { + [0] = START_STOP, + [1] = 1, + [4] = sdkp->device->start_stop_pwr_cond ? + 0x11 : 1, + }; + sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); - cmd[0] = START_STOP; - cmd[1] = 1; /* Return immediately */ - memset((void *) &cmd[2], 0, 8); - cmd[4] = 1; /* Start spin cycle */ - if (sdkp->device->start_stop_pwr_cond) - cmd[4] |= 1 << 4; - scsi_execute_cmd(sdkp->device, cmd, + scsi_execute_cmd(sdkp->device, start_cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); -- cgit v1.2.3 From c1acf38cd11efdc921f7d41107b00c2cb79453fc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:06 -0600 Subject: scsi: sd: Have midlayer retry sd_spinup_disk() errors This simplifies sd_spinup_disk() so the SCSI midlayer retries errors for it. Note that we retried every UA except Medium Not Present and also if scsi_status_is_good() returned failed which would happen for all check conditions. In this patch we use SCMD_FAILURE_STAT_ANY which will trigger for the same conditions as when scsi_status_is_good() returns false and there is status. This will cover all CCs including UAs so there is no explicit failures array entry for UAs except for Medium Not Present which we don't want to retry. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. We do not handle the outside loop's retries because we want to sleep between tries and we don't support that yet. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-6-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 77 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 32 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3d85913d373c..cb240015bde5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2235,55 +2235,68 @@ static int sd_done(struct scsi_cmnd *SCpnt) static void sd_spinup_disk(struct scsi_disk *sdkp) { - unsigned char cmd[10]; + static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; - int retries, spintime; + int spintime, sense_valid = 0; unsigned int the_result; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + /* Do not retry Medium Not Present */ + { + .sense = UNIT_ATTENTION, + .asc = 0x3A, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = 0x3A, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Retry when scsi_status_is_good would return false 3 times */ + { + .result = SCMD_FAILURE_STAT_ANY, + .allowed = 3, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; - int sense_valid = 0; spintime = 0; /* Spin up drives, as required. Only do this at boot time */ /* Spinup needs to be done for module loads too. */ do { - retries = 0; + bool media_was_present = sdkp->media_present; - do { - bool media_was_present = sdkp->media_present; + scsi_failures_reset_retries(&failures); - cmd[0] = TEST_UNIT_READY; - memset((void *) &cmd[1], 0, 9); - - the_result = scsi_execute_cmd(sdkp->device, cmd, - REQ_OP_DRV_IN, NULL, 0, - SD_TIMEOUT, - sdkp->max_retries, - &exec_args); + the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, + NULL, 0, SD_TIMEOUT, + sdkp->max_retries, &exec_args); - if (the_result > 0) { - /* - * If the drive has indicated to us that it - * doesn't have any media in it, don't bother - * with any more polling. - */ - if (media_not_present(sdkp, &sshdr)) { - if (media_was_present) - sd_printk(KERN_NOTICE, sdkp, - "Media removed, stopped polling\n"); - return; - } - sense_valid = scsi_sense_valid(&sshdr); + if (the_result > 0) { + /* + * If the drive has indicated to us that it doesn't + * have any media in it, don't bother with any more + * polling. + */ + if (media_not_present(sdkp, &sshdr)) { + if (media_was_present) + sd_printk(KERN_NOTICE, sdkp, + "Media removed, stopped polling\n"); + return; } - retries++; - } while (retries < 3 && - (!scsi_status_is_good(the_result) || - (scsi_status_is_check_condition(the_result) && - sense_valid && sshdr.sense_key == UNIT_ATTENTION))); + sense_valid = scsi_sense_valid(&sshdr); + } if (!scsi_status_is_check_condition(the_result)) { /* no sense, TUR either succeeded or failed -- cgit v1.2.3 From fabe3ee92e180726edf7c7509dc625410a88084b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:07 -0600 Subject: scsi: device_handler: hp_sw: Have midlayer retry scsi_execute_cmd() errors This has hp_sw have the SCSI midlayer retry scsi_execute_cmd() errors instead of driving them itself. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-7-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_hp_sw.c | 49 +++++++++++++++++++---------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 944ea4e0cc45..b6eaf49dfb00 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -46,9 +46,6 @@ static int tur_done(struct scsi_device *sdev, struct hp_sw_dh_data *h, int ret = SCSI_DH_IO; switch (sshdr->sense_key) { - case UNIT_ATTENTION: - ret = SCSI_DH_IMM_RETRY; - break; case NOT_READY: if (sshdr->asc == 0x04 && sshdr->ascq == 2) { /* @@ -85,11 +82,24 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) int ret, res; blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = SCMD_FAILURE_NO_LIMIT, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; -retry: res = scsi_execute_cmd(sdev, cmd, opf, NULL, 0, HP_SW_TIMEOUT, HP_SW_RETRIES, &exec_args); if (res > 0 && scsi_sense_valid(&sshdr)) { @@ -104,9 +114,6 @@ retry: ret = SCSI_DH_IO; } - if (ret == SCSI_DH_IMM_RETRY) - goto retry; - return ret; } @@ -122,14 +129,31 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h) struct scsi_sense_hdr sshdr; struct scsi_device *sdev = h->sdev; int res, rc; - int retry_cnt = HP_SW_RETRIES; blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; + struct scsi_failure failure_defs[] = { + { + /* + * LUN not ready - manual intervention required + * + * Switch-over in progress, retry. + */ + .sense = NOT_READY, + .asc = 0x04, + .ascq = 0x03, + .allowed = HP_SW_RETRIES, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; -retry: res = scsi_execute_cmd(sdev, cmd, opf, NULL, 0, HP_SW_TIMEOUT, HP_SW_RETRIES, &exec_args); if (!res) { @@ -144,13 +168,6 @@ retry: switch (sshdr.sense_key) { case NOT_READY: if (sshdr.asc == 0x04 && sshdr.ascq == 3) { - /* - * LUN not ready - manual intervention required - * - * Switch-over in progress, retry. - */ - if (--retry_cnt) - goto retry; rc = SCSI_DH_RETRY; break; } -- cgit v1.2.3 From f316ff46a0ffeada53da7e046bf67b0f3246d4b3 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:08 -0600 Subject: scsi: device_handler: rdac: Have midlayer retry send_mode_select() errors This has rdac have the SCSI midlayer retry errors instead of driving them itself. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-8-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_rdac.c | 84 ++++++++++++++++-------------- 1 file changed, 46 insertions(+), 38 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 1ac2ae17e8be..f8a09e3eba58 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -485,43 +485,17 @@ static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h) static int mode_select_handle_sense(struct scsi_device *sdev, struct scsi_sense_hdr *sense_hdr) { - int err = SCSI_DH_IO; struct rdac_dh_data *h = sdev->handler_data; if (!scsi_sense_valid(sense_hdr)) - goto done; - - switch (sense_hdr->sense_key) { - case NO_SENSE: - case ABORTED_COMMAND: - case UNIT_ATTENTION: - err = SCSI_DH_RETRY; - break; - case NOT_READY: - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01) - /* LUN Not Ready and is in the Process of Becoming - * Ready - */ - err = SCSI_DH_RETRY; - break; - case ILLEGAL_REQUEST: - if (sense_hdr->asc == 0x91 && sense_hdr->ascq == 0x36) - /* - * Command Lock contention - */ - err = SCSI_DH_IMM_RETRY; - break; - default: - break; - } + return SCSI_DH_IO; RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, " "MODE_SELECT returned with sense %02x/%02x/%02x", (char *) h->ctlr->array_name, h->ctlr->index, sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq); -done: - return err; + return SCSI_DH_IO; } static void send_mode_select(struct work_struct *work) @@ -530,7 +504,7 @@ static void send_mode_select(struct work_struct *work) container_of(work, struct rdac_controller, ms_work); struct scsi_device *sdev = ctlr->ms_sdev; struct rdac_dh_data *h = sdev->handler_data; - int rc, err, retry_cnt = RDAC_RETRY_COUNT; + int rc, err; struct rdac_queue_data *tmp, *qdata; LIST_HEAD(list); unsigned char cdb[MAX_COMMAND_SIZE]; @@ -538,8 +512,49 @@ static void send_mode_select(struct work_struct *work) unsigned int data_size; blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; + struct scsi_failure failure_defs[] = { + { + .sense = NO_SENSE, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = ABORTED_COMMAND, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* LUN Not Ready and is in the Process of Becoming Ready */ + { + .sense = NOT_READY, + .asc = 0x04, + .ascq = 0x01, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Command Lock contention */ + { + .sense = ILLEGAL_REQUEST, + .asc = 0x91, + .ascq = 0x36, + .allowed = SCMD_FAILURE_NO_LIMIT, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .total_allowed = RDAC_RETRY_COUNT, + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; spin_lock(&ctlr->ms_lock); @@ -548,15 +563,12 @@ static void send_mode_select(struct work_struct *work) ctlr->ms_sdev = NULL; spin_unlock(&ctlr->ms_lock); - retry: memset(cdb, 0, sizeof(cdb)); data_size = rdac_failover_get(ctlr, &list, cdb); - RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, " - "%s MODE_SELECT command", - (char *) h->ctlr->array_name, h->ctlr->index, - (retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying"); + RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, queueing MODE_SELECT command", + (char *)h->ctlr->array_name, h->ctlr->index); rc = scsi_execute_cmd(sdev, cdb, opf, &h->ctlr->mode_select, data_size, RDAC_TIMEOUT * HZ, RDAC_RETRIES, &exec_args); @@ -570,10 +582,6 @@ static void send_mode_select(struct work_struct *work) err = SCSI_DH_IO; } else { err = mode_select_handle_sense(sdev, &sshdr); - if (err == SCSI_DH_RETRY && retry_cnt--) - goto retry; - if (err == SCSI_DH_IMM_RETRY) - goto retry; } list_for_each_entry_safe(qdata, tmp, &list, entry) { -- cgit v1.2.3 From 5dbf10473642f822de62038a70addb54756b0109 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:09 -0600 Subject: scsi: spi: Have midlayer retry spi_execute() UAs This has spi_execute() have the SCSI midlayer retry UAs instead of driving them. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-9-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_spi.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index f668c1c0a98f..64852e6df3e3 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -108,29 +108,30 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd, enum req_op op, void *buffer, unsigned int bufflen, struct scsi_sense_hdr *sshdr) { - int i, result; - struct scsi_sense_hdr sshdr_tmp; blk_opf_t opf = op | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = DV_RETRIES, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { + /* bypass the SDEV_QUIESCE state with BLK_MQ_REQ_PM */ .req_flags = BLK_MQ_REQ_PM, - .sshdr = sshdr ? : &sshdr_tmp, + .sshdr = sshdr, + .failures = &failures, }; - sshdr = exec_args.sshdr; - - for(i = 0; i < DV_RETRIES; i++) { - /* - * The purpose of the RQF_PM flag below is to bypass the - * SDEV_QUIESCE state. - */ - result = scsi_execute_cmd(sdev, cmd, opf, buffer, bufflen, - DV_TIMEOUT, 1, &exec_args); - if (result < 0 || !scsi_sense_valid(sshdr) || - sshdr->sense_key != UNIT_ATTENTION) - break; - } - return result; + return scsi_execute_cmd(sdev, cmd, opf, buffer, bufflen, DV_TIMEOUT, 1, + &exec_args); } static struct { -- cgit v1.2.3 From 183053203d4532431bfdbddc04dd9306a03164a5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:10 -0600 Subject: scsi: sd: Have midlayer retry sd_sync_cache() errors This has sd_sync_cache() have the SCSI midlayer retry errors instead of driving them itself. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-10-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cb240015bde5..c2068d83c812 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1645,36 +1645,35 @@ out: static int sd_sync_cache(struct scsi_disk *sdkp) { - int retries, res; + int res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; + /* Leave the rest of the command zero to indicate flush everything. */ + const unsigned char cmd[16] = { sdp->use_16_for_sync ? + SYNCHRONIZE_CACHE_16 : SYNCHRONIZE_CACHE }; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + .allowed = 3, + .result = SCMD_FAILURE_RESULT_ANY, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, .sshdr = &sshdr, + .failures = &failures, }; if (!scsi_device_online(sdp)) return -ENODEV; - for (retries = 3; retries > 0; --retries) { - unsigned char cmd[16] = { 0 }; - - if (sdp->use_16_for_sync) - cmd[0] = SYNCHRONIZE_CACHE_16; - else - cmd[0] = SYNCHRONIZE_CACHE; - /* - * Leave the rest of the command zero to indicate - * flush everything. - */ - res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, - timeout, sdkp->max_retries, &exec_args); - if (res == 0) - break; - } - + res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, + sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Synchronize Cache(10) failed", res); -- cgit v1.2.3 From 11a26723210e91476b15f3d4f5def88609d04880 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:11 -0600 Subject: scsi: ch: Remove unit_attention unit_attention is not used so remove it. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-11-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin Wilck Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/ch.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 2b864061e073..9d7fa097ae9e 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -113,7 +113,6 @@ typedef struct { struct scsi_device **dt; /* ptrs to data transfer elements */ u_int firsts[CH_TYPES]; u_int counts[CH_TYPES]; - u_int unit_attention; u_int voltags; struct mutex lock; } scsi_changer; @@ -208,7 +207,6 @@ ch_do_scsi(scsi_changer *ch, unsigned char *cmd, int cmd_len, switch(sshdr.sense_key) { case UNIT_ATTENTION: - ch->unit_attention = 1; if (retries++ < 3) goto retry; break; -- cgit v1.2.3 From e11f35c46ebd746049a3d84dc68e7e8681aa26e8 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:12 -0600 Subject: scsi: ch: Have midlayer retry ch_do_scsi() UAs This has ch_do_scsi() have the SCSI midlayer retry UAs instead of driving them itself. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-12-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/ch.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 9d7fa097ae9e..1befcd5b2a0f 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -185,17 +185,29 @@ static int ch_do_scsi(scsi_changer *ch, unsigned char *cmd, int cmd_len, void *buffer, unsigned int buflength, enum req_op op) { - int errno, retries = 0, timeout, result; + int errno = 0, timeout, result; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = 3, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; timeout = (cmd[0] == INITIALIZE_ELEMENT_STATUS) ? timeout_init : timeout_move; - retry: - errno = 0; result = scsi_execute_cmd(ch->device, cmd, op, buffer, buflength, timeout * HZ, MAX_RETRIES, &exec_args); if (result < 0) @@ -204,13 +216,6 @@ ch_do_scsi(scsi_changer *ch, unsigned char *cmd, int cmd_len, if (debug) scsi_print_sense_hdr(ch->device, ch->name, &sshdr); errno = ch_find_errno(&sshdr); - - switch(sshdr.sense_key) { - case UNIT_ATTENTION: - if (retries++ < 3) - goto retry; - break; - } } return errno; } -- cgit v1.2.3 From 21bdff48e12bf674208e0575a03ca89d663f1a3c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:13 -0600 Subject: scsi: core: Have midlayer retry scsi_mode_sense() UAs This has scsi_mode_sense() have the SCSI midlayer retry UAs instead of driving them itself. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-13-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 16a33a558842..8a79e4f36636 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2262,11 +2262,25 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage, unsigned char cmd[12]; int use_10_for_ms; int header_length; - int result, retry_count = retries; + int result; struct scsi_sense_hdr my_sshdr; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = retries, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { /* caller might not be interested in sense, but we need it */ .sshdr = sshdr ? : &my_sshdr, + .failures = &failures, }; memset(data, 0, sizeof(*data)); @@ -2328,12 +2342,6 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage, goto retry; } } - if (scsi_status_is_check_condition(result) && - sshdr->sense_key == UNIT_ATTENTION && - retry_count) { - retry_count--; - goto retry; - } } return -EIO; } -- cgit v1.2.3 From 8d24677ebb9e79201801cedefc5127655d5e8c3f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:14 -0600 Subject: scsi: core: Have SCSI midlayer retry scsi_report_lun_scan() errors This has scsi_report_lun_scan() have the SCSI midlayer retry errors instead of driving them itself. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-14-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 57 ++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8ded08f37337..70c0319be34c 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1416,14 +1416,34 @@ static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflag unsigned int length; u64 lun; unsigned int num_luns; - unsigned int retries; int result; struct scsi_lun *lunp, *lun_data; - struct scsi_sense_hdr sshdr; struct scsi_device *sdev; struct Scsi_Host *shost = dev_to_shost(&starget->dev); + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Fail all CCs except the UA above */ + { + .sense = SCMD_FAILURE_SENSE_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Retry any other errors not listed above */ + { + .result = SCMD_FAILURE_RESULT_ANY, + }, + {} + }; + struct scsi_failures failures = { + .total_allowed = 3, + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { - .sshdr = &sshdr, + .failures = &failures, }; int ret = 0; @@ -1494,29 +1514,18 @@ retry: * should come through as a check condition, and will not generate * a retry. */ - for (retries = 0; retries < 3; retries++) { - SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, - "scsi scan: Sending REPORT LUNS to (try %d)\n", - retries)); - - result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, - lun_data, length, - SCSI_REPORT_LUNS_TIMEOUT, 3, - &exec_args); + scsi_failures_reset_retries(&failures); - SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, - "scsi scan: REPORT LUNS" - " %s (try %d) result 0x%x\n", - result ? "failed" : "successful", - retries, result)); - if (result == 0) - break; - else if (scsi_sense_valid(&sshdr)) { - if (sshdr.sense_key != UNIT_ATTENTION) - break; - } - } + SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, + "scsi scan: Sending REPORT LUNS\n")); + + result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, lun_data, + length, SCSI_REPORT_LUNS_TIMEOUT, 3, + &exec_args); + SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, + "scsi scan: REPORT LUNS %s result 0x%x\n", + result ? "failed" : "successful", result)); if (result) { /* * The device probably does not support a REPORT LUN command -- cgit v1.2.3 From eea6ef3792e34bd9476bef2fad074a8ce24915ec Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:15 -0600 Subject: scsi: sd: Have pr commands retry UAs It's common to get a UA when doing PR commands. It could be due to a target restarting, transport level relogin or other PR commands like a release causing it. The upper layers don't get the sense and in some cases have no idea if it's a SCSI device, so this has the sd layer retry. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-15-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c2068d83c812..4196f722c3f6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1800,8 +1800,22 @@ static int sd_pr_in_command(struct block_device *bdev, u8 sa, struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa }; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = 5, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; int result; @@ -1888,8 +1902,22 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = 5, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; int result; u8 cmd[16] = { 0, }; -- cgit v1.2.3 From 0f11328f2f46618c8c4734041fdb2aacfa99b802 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:16 -0600 Subject: scsi: sd: Have midlayer retry read_capacity_10() errors This has read_capacity_10() have the SCSI midlayer retry errors instead of driving them itself. There are 2 behavior changes with this patch: 1. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs since the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. 2. For the specific UAs we checked for and retried, we would get READ_CAPACITY_RETRIES_ON_RESET retries plus whatever retries were left from the main loop's retries. Each UA now gets READ_CAPACITY_RETRIES_ON_RESET retries, and the other errors get up to 3 retries. This is most likely ok, because READ_CAPACITY_RETRIES_ON_RESET is already 10 and is not based on anything specific like a spec or device, so the extra 3 we got from the main loop was probably just an accident and is not going to help. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-16-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 62 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 23 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4196f722c3f6..49159dcd638d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2588,42 +2588,58 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { - unsigned char cmd[16]; + static const u8 cmd[10] = { READ_CAPACITY }; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + /* Do not retry Medium Not Present */ + { + .sense = UNIT_ATTENTION, + .asc = 0x3A, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = 0x3A, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Device reset might occur several times so retry a lot */ + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + .allowed = READ_CAPACITY_RETRIES_ON_RESET, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Any other error not listed above retry 3 times */ + { + .result = SCMD_FAILURE_RESULT_ANY, + .allowed = 3, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; int sense_valid = 0; int the_result; - int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; sector_t lba; unsigned sector_size; - do { - cmd[0] = READ_CAPACITY; - memset(&cmd[1], 0, 9); - memset(buffer, 0, 8); + memset(buffer, 0, 8); - the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, - 8, SD_TIMEOUT, sdkp->max_retries, - &exec_args); + the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, + 8, SD_TIMEOUT, sdkp->max_retries, + &exec_args); + + if (the_result > 0) { + sense_valid = scsi_sense_valid(&sshdr); if (media_not_present(sdkp, &sshdr)) return -ENODEV; - - if (the_result > 0) { - sense_valid = scsi_sense_valid(&sshdr); - if (sense_valid && - sshdr.sense_key == UNIT_ATTENTION && - sshdr.asc == 0x29 && sshdr.ascq == 0x00) - /* Device reset might occur several times, - * give it one more chance */ - if (--reset_retries > 0) - continue; - } - retries--; - - } while (the_result && retries); + } if (the_result) { sd_print_result(sdkp, "Read Capacity(10) failed", the_result); -- cgit v1.2.3 From 3a7b4579328ec741d909066d648ca6be139f7bb6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:17 -0600 Subject: scsi: ses: Have midlayer retry scsi_execute_cmd() errors This has ses have the SCSI midlayer retry scsi_execute_cmd() errors instead of driving them itself. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-17-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 66 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 20 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index d7d0c35c58b8..0f2c87cc95e6 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -87,19 +87,32 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, 0 }; unsigned char recv_page_code; - unsigned int retries = SES_RETRIES; - struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = SES_RETRIES, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = SES_RETRIES, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { - .sshdr = &sshdr, + .failures = &failures, }; - do { - ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buf, bufflen, - SES_TIMEOUT, 1, &exec_args); - } while (ret > 0 && --retries && scsi_sense_valid(&sshdr) && - (sshdr.sense_key == NOT_READY || - (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - + ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buf, bufflen, + SES_TIMEOUT, 1, &exec_args); if (unlikely(ret)) return ret; @@ -131,19 +144,32 @@ static int ses_send_diag(struct scsi_device *sdev, int page_code, bufflen & 0xff, 0 }; - struct scsi_sense_hdr sshdr; - unsigned int retries = SES_RETRIES; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = SES_RETRIES, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = SES_RETRIES, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { - .sshdr = &sshdr, + .failures = &failures, }; - do { - result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, buf, - bufflen, SES_TIMEOUT, 1, &exec_args); - } while (result > 0 && --retries && scsi_sense_valid(&sshdr) && - (sshdr.sense_key == NOT_READY || - (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - + result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, buf, bufflen, + SES_TIMEOUT, 1, &exec_args); if (result) sdev_printk(KERN_ERR, sdev, "SEND DIAGNOSTIC result: %8x\n", result); -- cgit v1.2.3 From b72f2d149e24747ff686c21b44e04762fc9d3a2f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:18 -0600 Subject: scsi: sr: Have midlayer retry get_sectorsize() errors This has get_sectorsize() have the SCSI midlayer retry errors instead of driving them itself. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-18-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index d093dd187b2f..268b3a40891e 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -717,27 +717,29 @@ fail: static void get_sectorsize(struct scsi_cd *cd) { - unsigned char cmd[10]; - unsigned char buffer[8]; - int the_result, retries = 3; + static const u8 cmd[10] = { READ_CAPACITY }; + unsigned char buffer[8] = { }; + int the_result; int sector_size; struct request_queue *queue; + struct scsi_failure failure_defs[] = { + { + .result = SCMD_FAILURE_RESULT_ANY, + .allowed = 3, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; + const struct scsi_exec_args exec_args = { + .failures = &failures, + }; - do { - cmd[0] = READ_CAPACITY; - memset((void *) &cmd[1], 0, 9); - memset(buffer, 0, sizeof(buffer)); - - /* Do the command and wait.. */ - the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, - buffer, sizeof(buffer), - SR_TIMEOUT, MAX_RETRIES, NULL); - - retries--; - - } while (the_result && retries); - - + /* Do the command and wait.. */ + the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer, + sizeof(buffer), SR_TIMEOUT, MAX_RETRIES, + &exec_args); if (the_result) { cd->capacity = 0x1fffff; sector_size = 2048; /* A guess, just in case */ -- cgit v1.2.3 From 25a1f7a0a1fe6fa69a5370fbb5cc6dcf3726d81e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:20 -0600 Subject: scsi: core: Add kunit tests for scsi_check_passthrough() Add some kunit tests for scsi_check_passthrough() so we can easily make sure we are hitting the cases for which it's difficult to replicate in hardware or even scsi_debug. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-20-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 9 ++ drivers/scsi/scsi_lib.c | 4 + drivers/scsi/scsi_lib_test.c | 330 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 343 insertions(+) create mode 100644 drivers/scsi/scsi_lib_test.c (limited to 'drivers/scsi') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index addac7fbe37b..e20af95a912b 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -67,6 +67,15 @@ config SCSI_PROC_FS If unsure say Y. +config SCSI_LIB_KUNIT_TEST + tristate "KUnit tests for SCSI Mid Layer's scsi_lib" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Run SCSI Mid Layer's KUnit tests for scsi_lib. + + If unsure say N. + comment "SCSI support type (disk, tape, CD-ROM)" depends on SCSI diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8a79e4f36636..c726f2025c59 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -3434,3 +3434,7 @@ void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq) scmd->result = SAM_STAT_CHECK_CONDITION; } EXPORT_SYMBOL_GPL(scsi_build_sense); + +#ifdef CONFIG_SCSI_KUNIT_TEST +#include "scsi_lib_test.c" +#endif diff --git a/drivers/scsi/scsi_lib_test.c b/drivers/scsi/scsi_lib_test.c new file mode 100644 index 000000000000..99834426a100 --- /dev/null +++ b/drivers/scsi/scsi_lib_test.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit tests for scsi_lib.c. + * + * Copyright (C) 2023, Oracle Corporation + */ +#include + +#include +#include +#include + +#define SCSI_LIB_TEST_MAX_ALLOWED 3 +#define SCSI_LIB_TEST_TOTAL_MAX_ALLOWED 5 + +static void scsi_lib_test_multiple_sense(struct kunit *test) +{ + struct scsi_failure multiple_sense_failure_defs[] = { + { + .sense = DATA_PROTECT, + .asc = 0x1, + .ascq = 0x1, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = UNIT_ATTENTION, + .asc = 0x11, + .ascq = 0x0, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = 0x11, + .ascq = 0x22, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = ABORTED_COMMAND, + .asc = 0x11, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = HARDWARE_ERROR, + .asc = SCMD_FAILURE_ASC_ANY, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = ILLEGAL_REQUEST, + .asc = 0x91, + .ascq = 0x36, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = multiple_sense_failure_defs, + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + int i; + + /* Match end of array */ + scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); + /* Basic match in array */ + scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x11, 0x0); + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); + /* No matching sense entry */ + scsi_build_sense(&sc, 0, MISCOMPARE, 0x11, 0x11); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + /* Match using SCMD_FAILURE_ASCQ_ANY */ + scsi_build_sense(&sc, 0, ABORTED_COMMAND, 0x11, 0x22); + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); + /* Fail to match */ + scsi_build_sense(&sc, 0, ABORTED_COMMAND, 0x22, 0x22); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + /* Match using SCMD_FAILURE_ASC_ANY */ + scsi_build_sense(&sc, 0, HARDWARE_ERROR, 0x11, 0x22); + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); + /* No matching status entry */ + sc.result = SAM_STAT_RESERVATION_CONFLICT; + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + + /* Test hitting allowed limit */ + scsi_build_sense(&sc, 0, NOT_READY, 0x11, 0x22); + for (i = 0; i < SCSI_LIB_TEST_MAX_ALLOWED; i++) + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, + &failures)); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + + /* reset retries so we can retest */ + failures.failure_definitions = multiple_sense_failure_defs; + scsi_failures_reset_retries(&failures); + + /* Test no retries allowed */ + scsi_build_sense(&sc, 0, DATA_PROTECT, 0x1, 0x1); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_any_sense(struct kunit *test) +{ + struct scsi_failure any_sense_failure_defs[] = { + { + .result = SCMD_FAILURE_SENSE_ANY, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = any_sense_failure_defs, + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + + /* Match using SCMD_FAILURE_SENSE_ANY */ + failures.failure_definitions = any_sense_failure_defs; + scsi_build_sense(&sc, 0, MEDIUM_ERROR, 0x11, 0x22); + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_host(struct kunit *test) +{ + struct scsi_failure retryable_host_failure_defs[] = { + { + .result = DID_TRANSPORT_DISRUPTED << 16, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + }, + { + .result = DID_TIME_OUT << 16, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = retryable_host_failure_defs, + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + + /* No matching host byte entry */ + failures.failure_definitions = retryable_host_failure_defs; + sc.result = DID_NO_CONNECT << 16; + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + /* Matching host byte entry */ + sc.result = DID_TIME_OUT << 16; + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_any_failure(struct kunit *test) +{ + struct scsi_failure any_failure_defs[] = { + { + .result = SCMD_FAILURE_RESULT_ANY, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = any_failure_defs, + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + + /* Match SCMD_FAILURE_RESULT_ANY */ + failures.failure_definitions = any_failure_defs; + sc.result = DID_TRANSPORT_FAILFAST << 16; + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_any_status(struct kunit *test) +{ + struct scsi_failure any_status_failure_defs[] = { + { + .result = SCMD_FAILURE_STAT_ANY, + .allowed = SCSI_LIB_TEST_MAX_ALLOWED, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = any_status_failure_defs, + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + + /* Test any status handling */ + failures.failure_definitions = any_status_failure_defs; + sc.result = SAM_STAT_RESERVATION_CONFLICT; + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_total_allowed(struct kunit *test) +{ + struct scsi_failure total_allowed_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Fail all CCs except the UA above */ + { + .sense = SCMD_FAILURE_SENSE_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Retry any other errors not listed above */ + { + .result = SCMD_FAILURE_RESULT_ANY, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = total_allowed_defs, + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + int i; + + /* Test total_allowed */ + failures.failure_definitions = total_allowed_defs; + scsi_failures_reset_retries(&failures); + failures.total_allowed = SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; + + scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x28, 0x0); + for (i = 0; i < SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; i++) + /* Retry since we under the total_allowed limit */ + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, + &failures)); + sc.result = DID_TIME_OUT << 16; + /* We have now hit the total_allowed limit so no more retries */ + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_mixed_total(struct kunit *test) +{ + struct scsi_failure mixed_total_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = 0x28, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .allowed = 1, + .result = DID_TIME_OUT << 16, + }, + {} + }; + u8 sense[SCSI_SENSE_BUFFERSIZE] = {}; + struct scsi_failures failures = { + .failure_definitions = mixed_total_defs, + }; + struct scsi_cmnd sc = { + .sense_buffer = sense, + }; + int i; + + /* + * Test total_allowed when there is a mix of per failure allowed + * and total_allowed limits. + */ + failures.failure_definitions = mixed_total_defs; + scsi_failures_reset_retries(&failures); + failures.total_allowed = SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; + + scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x28, 0x0); + for (i = 0; i < SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; i++) + /* Retry since we under the total_allowed limit */ + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, + &failures)); + /* Do not retry since we are now over total_allowed limit */ + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + + scsi_failures_reset_retries(&failures); + scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x28, 0x0); + for (i = 0; i < SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; i++) + /* Retry since we under the total_allowed limit */ + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, + &failures)); + sc.result = DID_TIME_OUT << 16; + /* Retry because this failure has a per failure limit */ + KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); + scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x29, 0x0); + /* total_allowed is now hit so no more retries */ + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); +} + +static void scsi_lib_test_check_passthough(struct kunit *test) +{ + scsi_lib_test_multiple_sense(test); + scsi_lib_test_any_sense(test); + scsi_lib_test_host(test); + scsi_lib_test_any_failure(test); + scsi_lib_test_any_status(test); + scsi_lib_test_total_allowed(test); + scsi_lib_test_mixed_total(test); +} + +static struct kunit_case scsi_lib_test_cases[] = { + KUNIT_CASE(scsi_lib_test_check_passthough), + {} +}; + +static struct kunit_suite scsi_lib_test_suite = { + .name = "scsi_lib", + .test_cases = scsi_lib_test_cases, +}; + +kunit_test_suite(scsi_lib_test_suite); -- cgit v1.2.3