From becd9be6069e7b183c084f460f0eb363e43cc487 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 18 Mar 2023 20:56:12 -0500 Subject: scsi: target: Move sess cmd counter to new struct iSCSI needs to wait on outstanding commands like how SRP and the FC/FCoE drivers do. It can't use target_stop_session() because for MCS support we can't stop the entire session during recovery because if other connections are OK then we want to be able to continue to execute I/O on them. Move the per session cmd counters to a new struct so iSCSI can allocate them per connection. The xcopy code can also just not allocate in the future since it doesn't need to track commands. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230319015620.96006-2-michael.christie@oracle.com Reviewed-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen --- include/target/target_core_base.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/target/target_core_base.h') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 12c9ba16217e..bd299790e99c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -494,6 +494,7 @@ struct se_cmd { struct se_lun *se_lun; /* Only used for internal passthrough and legacy TCM fabric modules */ struct se_session *se_sess; + struct target_cmd_counter *cmd_cnt; struct se_tmr_req *se_tmr_req; struct llist_node se_cmd_list; struct completion *free_compl; @@ -619,22 +620,26 @@ static inline struct se_node_acl *fabric_stat_to_nacl(struct config_item *item) acl_fabric_stat_group); } -struct se_session { +struct target_cmd_counter { + struct percpu_ref refcnt; + wait_queue_head_t refcnt_wq; + struct completion stop_done; atomic_t stopped; +}; + +struct se_session { u64 sess_bin_isid; enum target_prot_op sup_prot_ops; enum target_prot_type sess_prot_type; struct se_node_acl *se_node_acl; struct se_portal_group *se_tpg; void *fabric_sess_ptr; - struct percpu_ref cmd_count; struct list_head sess_list; struct list_head sess_acl_list; spinlock_t sess_cmd_lock; - wait_queue_head_t cmd_count_wq; - struct completion stop_done; void *sess_cmd_map; struct sbitmap_queue sess_tag_pool; + struct target_cmd_counter *cmd_cnt; }; struct se_device; -- cgit v1.2.3 From 673db054d7a2b5a470d7a25baf65956d005ad729 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 18 Mar 2023 20:56:18 -0500 Subject: scsi: target: Fix multiple LUN_RESET handling This fixes a bug where an initiator thinks a LUN_RESET has cleaned up running commands when it hasn't. The bug was added in commit 51ec502a3266 ("target: Delete tmr from list before processing"). The problem occurs when: 1. We have N I/O cmds running in the target layer spread over 2 sessions. 2. The initiator sends a LUN_RESET for each session. 3. session1's LUN_RESET loops over all the running commands from both sessions and moves them to its local drain_task_list. 4. session2's LUN_RESET does not see the LUN_RESET from session1 because the commit above has it remove itself. session2 also does not see any commands since the other reset moved them off the state lists. 5. sessions2's LUN_RESET will then complete with a successful response. 6. sessions2's inititor believes the running commands on its session are now cleaned up due to the successful response and cleans up the running commands from its side. It then restarts them. 7. The commands do eventually complete on the backend and the target starts to return aborted task statuses for them. The initiator will either throw a invalid ITT error or might accidentally lookup a new task if the ITT has been reallocated already. Fix the bug by reverting the patch, and serialize the execution of LUN_RESETs and Preempt and Aborts. Also prevent us from waiting on LUN_RESETs in core_tmr_drain_tmr_list, because it turns out the original patch fixed a bug that was not mentioned. For LUN_RESET1 core_tmr_drain_tmr_list can see a second LUN_RESET and wait on it. Then the second reset will run core_tmr_drain_tmr_list and see the first reset and wait on it resulting in a deadlock. Fixes: 51ec502a3266 ("target: Delete tmr from list before processing") Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230319015620.96006-8-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_device.c | 1 + drivers/target/target_core_tmr.c | 26 +++++++++++++++++++++++--- include/target/target_core_base.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) (limited to 'include/target/target_core_base.h') diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index f6e58410ec3f..aeb03136773d 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -782,6 +782,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) spin_lock_init(&dev->t10_alua.lba_map_lock); INIT_WORK(&dev->delayed_cmd_work, target_do_delayed_work); + mutex_init(&dev->lun_reset_mutex); dev->t10_wwn.t10_dev = dev; /* diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 2b95b4550a63..4718db628222 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -188,14 +188,23 @@ static void core_tmr_drain_tmr_list( * LUN_RESET tmr.. */ spin_lock_irqsave(&dev->se_tmr_lock, flags); - if (tmr) - list_del_init(&tmr->tmr_list); list_for_each_entry_safe(tmr_p, tmr_pp, &dev->dev_tmr_list, tmr_list) { + if (tmr_p == tmr) + continue; + cmd = tmr_p->task_cmd; if (!cmd) { pr_err("Unable to locate struct se_cmd for TMR\n"); continue; } + + /* + * We only execute one LUN_RESET at a time so we can't wait + * on them below. + */ + if (tmr_p->function == TMR_LUN_RESET) + continue; + /* * If this function was called with a valid pr_res_key * parameter (eg: for PROUT PREEMPT_AND_ABORT service action @@ -379,14 +388,25 @@ int core_tmr_lun_reset( tmr_nacl->initiatorname); } } + + + /* + * We only allow one reset or preempt and abort to execute at a time + * to prevent one call from claiming all the cmds causing a second + * call from returning while cmds it should have waited on are still + * running. + */ + mutex_lock(&dev->lun_reset_mutex); + pr_debug("LUN_RESET: %s starting for [%s], tas: %d\n", (preempt_and_abort_list) ? "Preempt" : "TMR", dev->transport->name, tas); - core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list); core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas, preempt_and_abort_list); + mutex_unlock(&dev->lun_reset_mutex); + /* * Clear any legacy SPC-2 reservation when called during * LOGICAL UNIT RESET diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index bd299790e99c..8cc42ad65c92 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -872,6 +872,7 @@ struct se_device { struct rcu_head rcu_head; int queue_cnt; struct se_device_queue *queues; + struct mutex lun_reset_mutex; }; struct target_opcode_descriptor { -- cgit v1.2.3