diff options
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r-- | drivers/nvme/host/rdma.c | 136 |
1 files changed, 94 insertions, 42 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 337a624a537c..c8fd0e8f0237 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -638,6 +638,9 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + return; + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) __nvme_rdma_stop_queue(queue); @@ -979,16 +982,18 @@ free_ctrl: kfree(ctrl); } -static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, + int status) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* If we are resetting/deleting then do nothing */ - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { - WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_LIVE); + if (state != NVME_CTRL_CONNECTING) { + WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); return; } - if (nvmf_should_reconnect(&ctrl->ctrl)) { + if (nvmf_should_reconnect(&ctrl->ctrl, status)) { dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_wq, &ctrl->reconnect_work, @@ -1002,6 +1007,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) { int ret; bool changed; + u16 max_queue_size; ret = nvme_rdma_configure_admin_queue(ctrl, new); if (ret) @@ -1026,11 +1032,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); } - if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { + if (ctrl->ctrl.max_integrity_segments) + max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE; + else + max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE; + + if (ctrl->ctrl.sqsize + 1 > max_queue_size) { dev_warn(ctrl->ctrl.device, - "ctrl sqsize %u > max queue size %u, clamping down\n", - ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); - ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; + "ctrl sqsize %u > max queue size %u, clamping down\n", + ctrl->ctrl.sqsize + 1, max_queue_size); + ctrl->ctrl.sqsize = max_queue_size - 1; } if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { @@ -1056,8 +1067,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) * unless we're during creation of a new controller to * avoid races with teardown flow. */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; @@ -1077,6 +1090,7 @@ destroy_io: nvme_rdma_free_io_queues(ctrl); } destroy_admin: + nvme_stop_keep_alive(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); @@ -1091,10 +1105,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_rdma_ctrl, reconnect_work); + int ret; ++ctrl->ctrl.nr_reconnects; - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", @@ -1105,9 +1121,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; requeue: - dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", - ctrl->ctrl.nr_reconnects); - nvme_rdma_reconnect_or_remove(ctrl); + dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d/%d\n", + ctrl->ctrl.nr_reconnects, ctrl->ctrl.opts->max_reconnects); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -1125,12 +1141,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, 0); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -1158,7 +1176,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, struct nvme_rdma_queue *queue = wc->qp->qp_context; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) dev_info(ctrl->ctrl.device, "%s for CQE 0x%p failed with status %s (%d)\n", op, wc->wr_cqe, @@ -1345,8 +1363,8 @@ static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, if (control & NVME_RW_PRINFO_PRCHK_REF) domain->sig.dif.ref_remap = true; - domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); - domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.lbat); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.lbatm); domain->sig.dif.app_escape = true; if (pi_type == NVME_NS_DPS_PI_TYPE3) domain->sig.dif.ref_escape = true; @@ -1401,6 +1419,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, struct nvme_ns *ns = rq->q->queuedata; struct bio *bio = rq->bio; struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + u32 xfer_len; int nr; req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); @@ -1413,8 +1433,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, if (unlikely(nr)) goto mr_put; - nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, - req->mr->sig_attrs, ns->pi_type); + nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type); nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); @@ -1432,7 +1451,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_WRITE; sg->addr = cpu_to_le64(req->mr->iova); - put_unaligned_le24(req->mr->length, sg->length); + xfer_len = req->mr->length; + /* Check if PI is added by the HW */ + if (!pi_count) + xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size; + put_unaligned_le24(xfer_len, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; @@ -1473,7 +1496,7 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, req->metadata_sgl->sg_table.sgl = (struct scatterlist *)(req->metadata_sgl + 1); ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, - blk_rq_count_integrity_sg(rq->q, rq->bio), + rq->nr_integrity_segments, req->metadata_sgl->sg_table.sgl, NVME_INLINE_METADATA_SG_CNT); if (unlikely(ret)) { @@ -1481,8 +1504,8 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, goto out_unmap_sg; } - req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, - rq->bio, req->metadata_sgl->sg_table.sgl); + req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq, + req->metadata_sgl->sg_table.sgl); *pi_count = ib_dma_map_sg(ibdev, req->metadata_sgl->sg_table.sgl, req->metadata_sgl->nents, @@ -1853,6 +1876,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) */ priv.hrqsize = cpu_to_le16(queue->queue_size); priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); + /* cntlid should only be set when creating an I/O queue */ + priv.cntlid = cpu_to_le16(ctrl->ctrl.cntlid); } ret = rdma_connect_locked(queue->cm_id, ¶m); @@ -1937,11 +1962,15 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; + struct nvme_command *cmd = req->req.cmd; + int qid = nvme_rdma_queue_idx(queue); - dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", - rq->tag, nvme_rdma_queue_idx(queue)); + dev_warn(ctrl->ctrl.device, + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", + rq->tag, nvme_cid(rq), cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); - if (ctrl->ctrl.state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* * If we are resetting, connecting or deleting we should * complete immediately because we may block controller @@ -2008,7 +2037,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, queue->pi_support && (c->common.opcode == nvme_cmd_write || c->common.opcode == nvme_cmd_read) && - nvme_ns_has_pi(ns)) + nvme_ns_has_pi(ns->head)) req->use_sig_mr = true; else req->use_sig_mr = false; @@ -2145,6 +2174,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); + int ret; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -2155,14 +2185,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->ctrl.nr_reconnects; - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { @@ -2172,6 +2203,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, + .subsystem_reset = nvmf_subsystem_reset, .free_ctrl = nvme_rdma_free_ctrl, .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, @@ -2208,12 +2240,11 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) return found; } -static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, +static struct nvme_rdma_ctrl *nvme_rdma_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_rdma_ctrl *ctrl; int ret; - bool changed; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -2275,6 +2306,30 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (ret) goto out_kfree_queues; + return ctrl; + +out_kfree_queues: + kfree(ctrl->queues); +out_free_ctrl: + kfree(ctrl); + return ERR_PTR(ret); +} + +static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, + struct nvmf_ctrl_options *opts) +{ + struct nvme_rdma_ctrl *ctrl; + bool changed; + int ret; + + ctrl = nvme_rdma_alloc_ctrl(dev, opts); + if (IS_ERR(ctrl)) + return ERR_CAST(ctrl); + + ret = nvme_add_ctrl(&ctrl->ctrl); + if (ret) + goto out_put_ctrl; + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); WARN_ON_ONCE(!changed); @@ -2282,8 +2337,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); @@ -2293,15 +2348,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); +out_put_ctrl: nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); -out_kfree_queues: - kfree(ctrl->queues); -out_free_ctrl: - kfree(ctrl); - return ERR_PTR(ret); } static struct nvmf_transport_ops nvme_rdma_transport = { @@ -2386,4 +2437,5 @@ static void __exit nvme_rdma_cleanup_module(void) module_init(nvme_rdma_init_module); module_exit(nvme_rdma_cleanup_module); +MODULE_DESCRIPTION("NVMe host RDMA transport driver"); MODULE_LICENSE("GPL v2"); |