summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/tcp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-09-04 23:04:51 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-09-04 23:04:51 +0300
commit8075fc3b113dee1531106aaec3dfa19c8158374d (patch)
treec168243d0785bade241bcb223ddfd07418164cfb /drivers/nvme/host/tcp.c
parentd849ca483dba7546ad176da83bf66d1c013725f6 (diff)
parent7e24969022cbd61ddc586f14824fc205661bb124 (diff)
downloadlinux-8075fc3b113dee1531106aaec3dfa19c8158374d.tar.xz
Merge tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A bit larger than usual this week, mostly due to the NVMe fixes arriving late for -rc3 and hence didn't make last weeks pull request. - NVMe: - instance leak and io boundary fixes from Keith - fc locking fix from Christophe - various tcp/rdma reset during traffic fixes from Sagi - pci use-after-free fix from Tong - tcp target null deref fix from Ziye - Locking fix for partition removal (Christoph) - Ensure bdi->io_pages is always set (me) - Fixup for hd struct reference (Ming) - Fix for zero length bvecs (Ming) - Two small blk-iocost fixes (Tejun)" * tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block: block: allow for_each_bvec to support zero len bvec blk-stat: make q->stats->lock irqsafe blk-iocost: ioc_pd_free() shouldn't assume irq disabled block: fix locking in bdev_del_partition block: release disk reference in hd_struct_free_work block: ensure bdi->io_pages is always initialized nvme-pci: cancel nvme device request before disabling nvme: only use power of two io boundaries nvme: fix controller instance leak nvmet-fc: Fix a missed _irqsave version of spin_lock in 'nvmet_fc_fod_op_done()' nvme: Fix NULL dereference for pci nvme controllers nvme-rdma: fix reset hang if controller died in the middle of a reset nvme-rdma: fix timeout handler nvme-rdma: serialize controller teardown sequences nvme-tcp: fix reset hang if controller died in the middle of a reset nvme-tcp: fix timeout handler nvme-tcp: serialize controller teardown sequences nvme: have nvme_wait_freeze_timeout return if it timed out nvme-fabrics: don't check state NVME_CTRL_NEW for request acceptance nvmet-tcp: Fix NULL dereference when a connect data comes in h2cdata pdu
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r--drivers/nvme/host/tcp.c80
1 files changed, 57 insertions, 23 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a44d8ace3a81..16851ae3bddf 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -124,6 +124,7 @@ struct nvme_tcp_ctrl {
struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl;
+ struct mutex teardown_lock;
struct work_struct err_work;
struct delayed_work connect_work;
struct nvme_tcp_request async_req;
@@ -464,6 +465,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return;
+ dev_warn(ctrl->device, "starting error recovery\n");
queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}
@@ -1526,7 +1528,6 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
return;
-
__nvme_tcp_stop_queue(queue);
}
@@ -1781,7 +1782,15 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
if (!new) {
nvme_start_queues(ctrl);
- nvme_wait_freeze(ctrl);
+ if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+ /*
+ * If we timed out waiting for freeze we are likely to
+ * be stuck. Fail the controller initialization just
+ * to be safe.
+ */
+ ret = -ENODEV;
+ goto out_wait_freeze_timed_out;
+ }
blk_mq_update_nr_hw_queues(ctrl->tagset,
ctrl->queue_count - 1);
nvme_unfreeze(ctrl);
@@ -1789,6 +1798,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
return 0;
+out_wait_freeze_timed_out:
+ nvme_stop_queues(ctrl);
+ nvme_tcp_stop_io_queues(ctrl);
out_cleanup_connect_q:
if (new)
blk_cleanup_queue(ctrl->connect_q);
@@ -1874,6 +1886,7 @@ out_free_queue:
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
bool remove)
{
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
blk_mq_quiesce_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
if (ctrl->admin_tagset) {
@@ -1884,13 +1897,16 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
if (remove)
blk_mq_unquiesce_queue(ctrl->admin_q);
nvme_tcp_destroy_admin_queue(ctrl, remove);
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
bool remove)
{
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
if (ctrl->queue_count <= 1)
- return;
+ goto out;
+ blk_mq_quiesce_queue(ctrl->admin_q);
nvme_start_freeze(ctrl);
nvme_stop_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
@@ -1902,6 +1918,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
if (remove)
nvme_start_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove);
+out:
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
@@ -2148,40 +2166,55 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
nvme_tcp_queue_request(&ctrl->async_req, true, true);
}
+static void nvme_tcp_complete_timed_out(struct request *rq)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
+
+ /* fence other contexts that may complete the command */
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
+ nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
+ if (!blk_mq_request_completed(rq)) {
+ nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+ blk_mq_complete_request(rq);
+ }
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
+}
+
static enum blk_eh_timer_return
nvme_tcp_timeout(struct request *rq, bool reserved)
{
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
- struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
- /*
- * Restart the timer if a controller reset is already scheduled. Any
- * timed out commands would be handled before entering the connecting
- * state.
- */
- if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
- return BLK_EH_RESET_TIMER;
-
- dev_warn(ctrl->ctrl.device,
+ dev_warn(ctrl->device,
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
- if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ if (ctrl->state != NVME_CTRL_LIVE) {
/*
- * Teardown immediately if controller times out while starting
- * or we are already started error recovery. all outstanding
- * requests are completed on shutdown, so we return BLK_EH_DONE.
+ * If we are resetting, connecting or deleting we should
+ * complete immediately because we may block controller
+ * teardown or setup sequence
+ * - ctrl disable/shutdown fabrics requests
+ * - connect requests
+ * - initialization admin requests
+ * - I/O requests that entered after unquiescing and
+ * the controller stopped responding
+ *
+ * All other requests should be cancelled by the error
+ * recovery work, so it's fine that we fail it here.
*/
- flush_work(&ctrl->err_work);
- nvme_tcp_teardown_io_queues(&ctrl->ctrl, false);
- nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false);
+ nvme_tcp_complete_timed_out(rq);
return BLK_EH_DONE;
}
- dev_warn(ctrl->ctrl.device, "starting error recovery\n");
- nvme_tcp_error_recovery(&ctrl->ctrl);
-
+ /*
+ * LIVE state should trigger the normal error recovery which will
+ * handle completing this request.
+ */
+ nvme_tcp_error_recovery(ctrl);
return BLK_EH_RESET_TIMER;
}
@@ -2422,6 +2455,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
nvme_tcp_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
+ mutex_init(&ctrl->teardown_lock);
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid =