From 3a71f0f7a51259b3cb95d79cac1e19dcc5e89ce9 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Oct 2021 03:42:40 +0200 Subject: scsi: core: Fix early registration of sysfs attributes for scsi_device v4.17 commit 86b87cde0b55 ("scsi: core: host template attribute groups") introduced explicit sysfs_create_groups() in scsi_sysfs_add_sdev() and sysfs_remove_groups() in __scsi_remove_device(), both for sdev_gendev, based on a new field const struct attribute_group **sdev_groups of struct scsi_host_template. Commit 92c4b58b15c5 ("scsi: core: Register sysfs attributes earlier") removed above explicit (de)registration of scsi_device attribute groups. It also converted all scsi_device attributes and attribute_groups to end up in a new field const struct attribute_group *gendev_attr_groups[6] of struct scsi_device. However, that new field was not used anywhere. Surprisingly, this only caused missing LLDD specific scsi_device sysfs attributes. Whereas, scsi core attributes from scsi_sdev_attr_groups did continue to exist because of scsi_dev_type.groups. We separate scsi core attibutes from LLDD specific attributes. Hence, we keep the initializing assignment scsi_dev_type = { .groups = scsi_sdev_attr_groups, } as this takes care of core attributes. Without the separation, it would cause attribute double registration due to scsi_dev_type.groups and sdev_gendev.groups. Julian suggested to assign the sdev_groups pointer of the scsi_host_template directly to the groups pointer of sdev_gendev. This way we can delete the container scsi_device.gendev_attr_groups and the loop copying each entry from hostt->sdev_groups to sdev->gendev_attr_groups. Alternative approaches ruled out: Assigning gendev_attr_groups to sdev_dev has no visible effect. Assigning sdev->gendev_attr_groups to scsi_dev_type.groups caused scsi_device of all scsi host types to get LLDD specific attributes of the LLDD for which the last sdev alloc happened to occur, as that overwrote scsi_dev_type.groups, e.g. scsi_debug had zfcp-specific scsi_device attributes. Link: https://lore.kernel.org/r/20211026014240.4098365-1-maier@linux.ibm.com Fixes: 92c4b58b15c5 ("scsi: core: Register sysfs attributes earlier") Suggested-by: Julian Wiedmann Reviewed-by: Bart Van Assche Signed-off-by: Steffen Maier Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b1e9b3bd3a60..b97e142a7ca9 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -225,12 +225,6 @@ struct scsi_device { struct device sdev_gendev, sdev_dev; - /* - * The array size 6 provides space for one attribute group for the - * SCSI core, four attribute groups defined by SCSI LLDs and one - * terminating NULL pointer. - */ - const struct attribute_group *gendev_attr_groups[6]; struct execute_work ew; /* used to get process context on put */ struct work_struct requeue_work; -- cgit v1.2.3 From 5ae17501bc62a49b0b193dcce003f16375f16654 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Fri, 29 Oct 2021 15:43:10 -0400 Subject: scsi: core: Avoid leaving shost->last_reset with stale value if EH does not run The changes to issue the abort from the scmd->abort_work instead of the EH thread introduced a problem if eh_deadline is used. If aborting the command(s) is successful, and there are never any scmds added to the shost->eh_cmd_q, there is no code path which will reset the ->last_reset value back to zero. The effect of this is that after a successful abort with no EH thread activity, a subsequent timeout, perhaps a long time later, might immediately be considered past a user-set eh_deadline time, and the host will be reset with no attempt at recovery. Fix this by resetting ->last_reset back to zero in scmd_eh_abort_handler() if it is determined that the EH thread will not run to do this. Thanks to Gopinath Marappan for investigating this problem. Link: https://lore.kernel.org/r/20211029194311.17504-2-emilne@redhat.com Fixes: e494f6a72839 ("[SCSI] improved eh timeout handler") Cc: stable@vger.kernel.org Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 1 + drivers/scsi/scsi_error.c | 25 +++++++++++++++++++++++++ drivers/scsi/scsi_lib.c | 1 + include/scsi/scsi_cmnd.h | 2 +- include/scsi/scsi_host.h | 1 + 5 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 17aef936bc90..2cb7163e24cc 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -387,6 +387,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->shost_state = SHOST_CREATED; INIT_LIST_HEAD(&shost->__devices); INIT_LIST_HEAD(&shost->__targets); + INIT_LIST_HEAD(&shost->eh_abort_list); INIT_LIST_HEAD(&shost->eh_cmd_q); INIT_LIST_HEAD(&shost->starved_list); init_waitqueue_head(&shost->host_wait); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 3de03925550e..bdf782d9cb86 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -133,6 +133,23 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd) return true; } +static void scsi_eh_complete_abort(struct scsi_cmnd *scmd, struct Scsi_Host *shost) +{ + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + list_del_init(&scmd->eh_entry); + /* + * If the abort succeeds, and there is no further + * EH action, clear the ->last_reset time. + */ + if (list_empty(&shost->eh_abort_list) && + list_empty(&shost->eh_cmd_q)) + if (shost->eh_deadline != -1) + shost->last_reset = 0; + spin_unlock_irqrestore(shost->host_lock, flags); +} + /** * scmd_eh_abort_handler - Handle command aborts * @work: command to be aborted. @@ -150,6 +167,7 @@ scmd_eh_abort_handler(struct work_struct *work) container_of(work, struct scsi_cmnd, abort_work.work); struct scsi_device *sdev = scmd->device; enum scsi_disposition rtn; + unsigned long flags; if (scsi_host_eh_past_deadline(sdev->host)) { SCSI_LOG_ERROR_RECOVERY(3, @@ -173,12 +191,14 @@ scmd_eh_abort_handler(struct work_struct *work) SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd, "retry aborted command\n")); + scsi_eh_complete_abort(scmd, sdev->host); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); return; } else { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd, "finish aborted command\n")); + scsi_eh_complete_abort(scmd, sdev->host); scsi_finish_command(scmd); return; } @@ -191,6 +211,9 @@ scmd_eh_abort_handler(struct work_struct *work) } } + spin_lock_irqsave(sdev->host->host_lock, flags); + list_del_init(&scmd->eh_entry); + spin_unlock_irqrestore(sdev->host->host_lock, flags); scsi_eh_scmd_add(scmd); } @@ -221,6 +244,8 @@ scsi_abort_command(struct scsi_cmnd *scmd) spin_lock_irqsave(shost->host_lock, flags); if (shost->eh_deadline != -1 && !shost->last_reset) shost->last_reset = jiffies; + BUG_ON(!list_empty(&scmd->eh_entry)); + list_add_tail(&scmd->eh_entry, &shost->eh_abort_list); spin_unlock_irqrestore(shost->host_lock, flags); scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d0b7c6dc74f8..c851c05d6091 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1143,6 +1143,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) cmd->sense_buffer = buf; cmd->prot_sdb = prot; cmd->flags = flags; + INIT_LIST_HEAD(&cmd->eh_entry); INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); cmd->jiffies_at_alloc = jiffies_at_alloc; cmd->retries = retries; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 7958a604f979..29ac40cf1aae 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -73,7 +73,7 @@ enum scsi_cmnd_submitter { struct scsi_cmnd { struct scsi_request req; struct scsi_device *device; - struct list_head eh_entry; /* entry for the host eh_cmd_q */ + struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */ struct delayed_work abort_work; struct rcu_head rcu; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index ae715959f886..ebe059badba0 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -551,6 +551,7 @@ struct Scsi_Host { struct mutex scan_mutex;/* serialize scanning activity */ + struct list_head eh_abort_list; struct list_head eh_cmd_q; struct task_struct * ehandler; /* Error recovery thread. */ struct completion * eh_action; /* Wait for specific actions on the -- cgit v1.2.3