diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-09-04 23:04:51 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-09-04 23:04:51 +0300 |
commit | 8075fc3b113dee1531106aaec3dfa19c8158374d (patch) | |
tree | c168243d0785bade241bcb223ddfd07418164cfb /drivers/nvme/host/tcp.c | |
parent | d849ca483dba7546ad176da83bf66d1c013725f6 (diff) | |
parent | 7e24969022cbd61ddc586f14824fc205661bb124 (diff) | |
download | linux-8075fc3b113dee1531106aaec3dfa19c8158374d.tar.xz |
Merge tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A bit larger than usual this week, mostly due to the NVMe fixes
arriving late for -rc3 and hence didn't make last weeks pull request.
- NVMe:
- instance leak and io boundary fixes from Keith
- fc locking fix from Christophe
- various tcp/rdma reset during traffic fixes from Sagi
- pci use-after-free fix from Tong
- tcp target null deref fix from Ziye
- Locking fix for partition removal (Christoph)
- Ensure bdi->io_pages is always set (me)
- Fixup for hd struct reference (Ming)
- Fix for zero length bvecs (Ming)
- Two small blk-iocost fixes (Tejun)"
* tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block:
block: allow for_each_bvec to support zero len bvec
blk-stat: make q->stats->lock irqsafe
blk-iocost: ioc_pd_free() shouldn't assume irq disabled
block: fix locking in bdev_del_partition
block: release disk reference in hd_struct_free_work
block: ensure bdi->io_pages is always initialized
nvme-pci: cancel nvme device request before disabling
nvme: only use power of two io boundaries
nvme: fix controller instance leak
nvmet-fc: Fix a missed _irqsave version of spin_lock in 'nvmet_fc_fod_op_done()'
nvme: Fix NULL dereference for pci nvme controllers
nvme-rdma: fix reset hang if controller died in the middle of a reset
nvme-rdma: fix timeout handler
nvme-rdma: serialize controller teardown sequences
nvme-tcp: fix reset hang if controller died in the middle of a reset
nvme-tcp: fix timeout handler
nvme-tcp: serialize controller teardown sequences
nvme: have nvme_wait_freeze_timeout return if it timed out
nvme-fabrics: don't check state NVME_CTRL_NEW for request acceptance
nvmet-tcp: Fix NULL dereference when a connect data comes in h2cdata pdu
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r-- | drivers/nvme/host/tcp.c | 80 |
1 files changed, 57 insertions, 23 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index a44d8ace3a81..16851ae3bddf 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -124,6 +124,7 @@ struct nvme_tcp_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; + struct mutex teardown_lock; struct work_struct err_work; struct delayed_work connect_work; struct nvme_tcp_request async_req; @@ -464,6 +465,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) return; + dev_warn(ctrl->device, "starting error recovery\n"); queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); } @@ -1526,7 +1528,6 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) return; - __nvme_tcp_stop_queue(queue); } @@ -1781,7 +1782,15 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) if (!new) { nvme_start_queues(ctrl); - nvme_wait_freeze(ctrl); + if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { + /* + * If we timed out waiting for freeze we are likely to + * be stuck. Fail the controller initialization just + * to be safe. + */ + ret = -ENODEV; + goto out_wait_freeze_timed_out; + } blk_mq_update_nr_hw_queues(ctrl->tagset, ctrl->queue_count - 1); nvme_unfreeze(ctrl); @@ -1789,6 +1798,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return 0; +out_wait_freeze_timed_out: + nvme_stop_queues(ctrl); + nvme_tcp_stop_io_queues(ctrl); out_cleanup_connect_q: if (new) blk_cleanup_queue(ctrl->connect_q); @@ -1874,6 +1886,7 @@ out_free_queue: static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { + mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); blk_mq_quiesce_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); if (ctrl->admin_tagset) { @@ -1884,13 +1897,16 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); + mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, bool remove) { + mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); if (ctrl->queue_count <= 1) - return; + goto out; + blk_mq_quiesce_queue(ctrl->admin_q); nvme_start_freeze(ctrl); nvme_stop_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); @@ -1902,6 +1918,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); +out: + mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) @@ -2148,40 +2166,55 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) nvme_tcp_queue_request(&ctrl->async_req, true, true); } +static void nvme_tcp_complete_timed_out(struct request *rq) +{ + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; + + /* fence other contexts that may complete the command */ + mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); + nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); + if (!blk_mq_request_completed(rq)) { + nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; + blk_mq_complete_request(rq); + } + mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); +} + static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq, bool reserved) { struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; + struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = req->pdu; - /* - * Restart the timer if a controller reset is already scheduled. Any - * timed out commands would be handled before entering the connecting - * state. - */ - if (ctrl->ctrl.state == NVME_CTRL_RESETTING) - return BLK_EH_RESET_TIMER; - - dev_warn(ctrl->ctrl.device, + dev_warn(ctrl->device, "queue %d: timeout request %#x type %d\n", nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); - if (ctrl->ctrl.state != NVME_CTRL_LIVE) { + if (ctrl->state != NVME_CTRL_LIVE) { /* - * Teardown immediately if controller times out while starting - * or we are already started error recovery. all outstanding - * requests are completed on shutdown, so we return BLK_EH_DONE. + * If we are resetting, connecting or deleting we should + * complete immediately because we may block controller + * teardown or setup sequence + * - ctrl disable/shutdown fabrics requests + * - connect requests + * - initialization admin requests + * - I/O requests that entered after unquiescing and + * the controller stopped responding + * + * All other requests should be cancelled by the error + * recovery work, so it's fine that we fail it here. */ - flush_work(&ctrl->err_work); - nvme_tcp_teardown_io_queues(&ctrl->ctrl, false); - nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false); + nvme_tcp_complete_timed_out(rq); return BLK_EH_DONE; } - dev_warn(ctrl->ctrl.device, "starting error recovery\n"); - nvme_tcp_error_recovery(&ctrl->ctrl); - + /* + * LIVE state should trigger the normal error recovery which will + * handle completing this request. + */ + nvme_tcp_error_recovery(ctrl); return BLK_EH_RESET_TIMER; } @@ -2422,6 +2455,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, nvme_tcp_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); + mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = |