summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/rdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r--drivers/nvme/host/rdma.c136
1 files changed, 94 insertions, 42 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 337a624a537c..c8fd0e8f0237 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -638,6 +638,9 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
+ if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
+ return;
+
mutex_lock(&queue->queue_lock);
if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
__nvme_rdma_stop_queue(queue);
@@ -979,16 +982,18 @@ free_ctrl:
kfree(ctrl);
}
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl,
+ int status)
{
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
/* If we are resetting/deleting then do nothing */
- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
- WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_LIVE);
+ if (state != NVME_CTRL_CONNECTING) {
+ WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
return;
}
- if (nvmf_should_reconnect(&ctrl->ctrl)) {
+ if (nvmf_should_reconnect(&ctrl->ctrl, status)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
@@ -1002,6 +1007,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
{
int ret;
bool changed;
+ u16 max_queue_size;
ret = nvme_rdma_configure_admin_queue(ctrl, new);
if (ret)
@@ -1026,11 +1032,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
}
- if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
+ if (ctrl->ctrl.max_integrity_segments)
+ max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
+ else
+ max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
+
+ if (ctrl->ctrl.sqsize + 1 > max_queue_size) {
dev_warn(ctrl->ctrl.device,
- "ctrl sqsize %u > max queue size %u, clamping down\n",
- ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
- ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
+ "ctrl sqsize %u > max queue size %u, clamping down\n",
+ ctrl->ctrl.sqsize + 1, max_queue_size);
+ ctrl->ctrl.sqsize = max_queue_size - 1;
}
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
@@ -1056,8 +1067,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+ WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+ state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
@@ -1077,6 +1090,7 @@ destroy_io:
nvme_rdma_free_io_queues(ctrl);
}
destroy_admin:
+ nvme_stop_keep_alive(&ctrl->ctrl);
nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
@@ -1091,10 +1105,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;
++ctrl->ctrl.nr_reconnects;
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1105,9 +1121,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
return;
requeue:
- dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
- ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d/%d\n",
+ ctrl->ctrl.nr_reconnects, ctrl->ctrl.opts->max_reconnects);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1125,12 +1141,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+ WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+ state != NVME_CTRL_DELETING_NOIO);
return;
}
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, 0);
}
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -1158,7 +1176,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+ if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
dev_info(ctrl->ctrl.device,
"%s for CQE 0x%p failed with status %s (%d)\n",
op, wc->wr_cqe,
@@ -1345,8 +1363,8 @@ static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
if (control & NVME_RW_PRINFO_PRCHK_REF)
domain->sig.dif.ref_remap = true;
- domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
- domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.lbat);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.lbatm);
domain->sig.dif.app_escape = true;
if (pi_type == NVME_NS_DPS_PI_TYPE3)
domain->sig.dif.ref_escape = true;
@@ -1401,6 +1419,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
struct nvme_ns *ns = rq->q->queuedata;
struct bio *bio = rq->bio;
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ u32 xfer_len;
int nr;
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
@@ -1413,8 +1433,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
if (unlikely(nr))
goto mr_put;
- nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
- req->mr->sig_attrs, ns->pi_type);
+ nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type);
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
@@ -1432,7 +1451,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
IB_ACCESS_REMOTE_WRITE;
sg->addr = cpu_to_le64(req->mr->iova);
- put_unaligned_le24(req->mr->length, sg->length);
+ xfer_len = req->mr->length;
+ /* Check if PI is added by the HW */
+ if (!pi_count)
+ xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size;
+ put_unaligned_le24(xfer_len, sg->length);
put_unaligned_le32(req->mr->rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
@@ -1473,7 +1496,7 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
req->metadata_sgl->sg_table.sgl =
(struct scatterlist *)(req->metadata_sgl + 1);
ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
- blk_rq_count_integrity_sg(rq->q, rq->bio),
+ rq->nr_integrity_segments,
req->metadata_sgl->sg_table.sgl,
NVME_INLINE_METADATA_SG_CNT);
if (unlikely(ret)) {
@@ -1481,8 +1504,8 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
goto out_unmap_sg;
}
- req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
- rq->bio, req->metadata_sgl->sg_table.sgl);
+ req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq,
+ req->metadata_sgl->sg_table.sgl);
*pi_count = ib_dma_map_sg(ibdev,
req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents,
@@ -1853,6 +1876,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
*/
priv.hrqsize = cpu_to_le16(queue->queue_size);
priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
+ /* cntlid should only be set when creating an I/O queue */
+ priv.cntlid = cpu_to_le16(ctrl->ctrl.cntlid);
}
ret = rdma_connect_locked(queue->cm_id, &param);
@@ -1937,11 +1962,15 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
+ struct nvme_command *cmd = req->req.cmd;
+ int qid = nvme_rdma_queue_idx(queue);
- dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
- rq->tag, nvme_rdma_queue_idx(queue));
+ dev_warn(ctrl->ctrl.device,
+ "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
+ rq->tag, nvme_cid(rq), cmd->common.opcode,
+ nvme_fabrics_opcode_str(qid, cmd), qid);
- if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller
@@ -2008,7 +2037,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
queue->pi_support &&
(c->common.opcode == nvme_cmd_write ||
c->common.opcode == nvme_cmd_read) &&
- nvme_ns_has_pi(ns))
+ nvme_ns_has_pi(ns->head))
req->use_sig_mr = true;
else
req->use_sig_mr = false;
@@ -2145,6 +2174,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2155,14 +2185,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -2172,6 +2203,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
@@ -2208,12 +2240,11 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}
-static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+static struct nvme_rdma_ctrl *nvme_rdma_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_rdma_ctrl *ctrl;
int ret;
- bool changed;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
@@ -2275,6 +2306,30 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ return ctrl;
+
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_free_ctrl:
+ kfree(ctrl);
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+ struct nvmf_ctrl_options *opts)
+{
+ struct nvme_rdma_ctrl *ctrl;
+ bool changed;
+ int ret;
+
+ ctrl = nvme_rdma_alloc_ctrl(dev, opts);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
WARN_ON_ONCE(!changed);
@@ -2282,8 +2337,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (ret)
goto out_uninit_ctrl;
- dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
- nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
+ dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n",
+ nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn);
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
@@ -2293,15 +2348,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
-out_kfree_queues:
- kfree(ctrl->queues);
-out_free_ctrl:
- kfree(ctrl);
- return ERR_PTR(ret);
}
static struct nvmf_transport_ops nvme_rdma_transport = {
@@ -2386,4 +2437,5 @@ static void __exit nvme_rdma_cleanup_module(void)
module_init(nvme_rdma_init_module);
module_exit(nvme_rdma_cleanup_module);
+MODULE_DESCRIPTION("NVMe host RDMA transport driver");
MODULE_LICENSE("GPL v2");