From 971237b900c38f50e7865289a2aecb77dc7f09f3 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 27 Sep 2023 11:35:57 +0800 Subject: scsi: ufs: core: WLUN send SSU timeout recovery When runtime PM send SSU times out, the SCSI core invokes eh_host_reset_handler, in this case ufshcd_eh_host_reset_handler(), which is then stuck waiting for flush_work(&hba->eh_work). However, ufshcd_err_handler hangs in wait RPM resume. Do link recovery only in this case. The following IO hang stack dump was observed: kworker/4:0 D __switch_to+0x180/0x344 __schedule+0x5ec/0xa14 schedule+0x78/0xe0 schedule_timeout+0xb0/0x15c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x108/0x19c wait_for_completion_io_timeout+0x50/0x78 blk_execute_rq+0x1b8/0x218 scsi_execute_cmd+0x148/0x238 ufshcd_set_dev_pwr_mode+0xe8/0x244 __ufshcd_wl_resume+0x1e0/0x45c ufshcd_wl_runtime_resume+0x3c/0x174 scsi_runtime_resume+0x7c/0xc8 __rpm_callback+0xa0/0x410 rpm_resume+0x43c/0x67c __rpm_callback+0x1f0/0x410 rpm_resume+0x460/0x67c pm_runtime_work+0xa4/0xac process_one_work+0x208/0x598 worker_thread+0x228/0x438 kthread+0x104/0x1d4 ret_from_fork+0x10/0x20 scsi_eh_0 D __switch_to+0x180/0x344 __schedule+0x5ec/0xa14 schedule+0x78/0xe0 schedule_timeout+0x44/0x15c do_wait_for_common+0x108/0x19c wait_for_completion+0x48/0x64 __flush_work+0x260/0x2d0 flush_work+0x10/0x20 ufshcd_eh_host_reset_handler+0x88/0xcc scsi_try_host_reset+0x48/0xe0 scsi_eh_ready_devs+0x934/0xa40 scsi_error_handler+0x168/0x374 kthread+0x104/0x1d4 ret_from_fork+0x10/0x20 kworker/u16:5 D __switch_to+0x180/0x344 __schedule+0x5ec/0xa14 schedule+0x78/0xe0 rpm_resume+0x114/0x67c __pm_runtime_resume+0x70/0xb4 ufshcd_err_handler+0x1a0/0xe68 process_one_work+0x208/0x598 worker_thread+0x228/0x438 kthread+0x104/0x1d4 ret_from_fork+0x10/0x20 Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20230927033557.13801-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/ufs/core') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0f08234def3f..1839e188a2c5 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7708,6 +7708,19 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) hba = shost_priv(cmd->device->host); + /* + * If runtime PM sent SSU and got a timeout, scsi_error_handler is + * stuck in this function waiting for flush_work(&hba->eh_work). And + * ufshcd_err_handler(eh_work) is stuck waiting for runtime PM. Do + * ufshcd_link_recovery instead of eh_work to prevent deadlock. + */ + if (hba->pm_op_in_progress) { + if (ufshcd_link_recovery(hba)) + err = FAILED; + + return err; + } + spin_lock_irqsave(hba->host->host_lock, flags); hba->force_reset = true; ufshcd_schedule_eh_work(hba); -- cgit v1.2.3