summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c12
-rw-r--r--drivers/nvme/host/fabrics.c18
-rw-r--r--drivers/nvme/host/fc.c58
-rw-r--r--drivers/nvme/host/pci.c48
-rw-r--r--drivers/nvme/host/rdma.c36
6 files changed, 113 insertions, 60 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index cc0aacb4c8b4..7b96e4588a12 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NVME_CORE) += nvme-core.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index acc816b67582..37f9039bb9ca 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -134,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
return false;
if (nvme_req(req)->status & NVME_SC_DNR)
return false;
- if (jiffies - req->start_time >= req->timeout)
- return false;
if (nvme_req(req)->retries >= nvme_max_retries)
return false;
return true;
@@ -1251,6 +1249,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
goto out;
}
+ __nvme_revalidate_disk(disk, id);
nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
if (!uuid_equal(&ns->uuid, &uuid) ||
memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
@@ -2138,7 +2137,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
if (a == &dev_attr_uuid.attr) {
- if (uuid_is_null(&ns->uuid) ||
+ if (uuid_is_null(&ns->uuid) &&
!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
return 0;
}
@@ -2590,7 +2589,7 @@ static void nvme_async_event_work(struct work_struct *work)
container_of(work, struct nvme_ctrl, async_event_work);
spin_lock_irq(&ctrl->lock);
- while (ctrl->event_limit > 0) {
+ while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
int aer_idx = --ctrl->event_limit;
spin_unlock_irq(&ctrl->lock);
@@ -2677,7 +2676,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
/*FALLTHRU*/
case NVME_SC_ABORT_REQ:
++ctrl->event_limit;
- queue_work(nvme_wq, &ctrl->async_event_work);
+ if (ctrl->state == NVME_CTRL_LIVE)
+ queue_work(nvme_wq, &ctrl->async_event_work);
break;
default:
break;
@@ -2692,7 +2692,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
- schedule_work(&ctrl->fw_act_work);
+ queue_work(nvme_wq, &ctrl->fw_act_work);
break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 47307752dc65..555c976cc2ee 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -565,6 +565,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
opts->nr_io_queues = num_online_cpus();
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
+ opts->kato = NVME_DEFAULT_KATO;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -655,21 +656,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
- if (opts->discovery_nqn) {
- pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
- ret = -EINVAL;
- goto out;
- }
-
if (token < 0) {
pr_err("Invalid keep_alive_tmo %d\n", token);
ret = -EINVAL;
goto out;
- } else if (token == 0) {
+ } else if (token == 0 && !opts->discovery_nqn) {
/* Allowed for debug */
pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
}
opts->kato = token;
+
+ if (opts->discovery_nqn && opts->kato) {
+ pr_err("Discovery controllers cannot accept KATO != 0\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
break;
case NVMF_OPT_CTRL_LOSS_TMO:
if (match_int(args, &token)) {
@@ -762,8 +764,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
uuid_copy(&opts->host->id, &hostid);
out:
- if (!opts->discovery_nqn && !opts->kato)
- opts->kato = NVME_DEFAULT_KATO;
kfree(options);
return ret;
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d2e882c0f496..be49d0f79381 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1376,7 +1376,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
else if (freq->status)
- status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+ status = cpu_to_le16(NVME_SC_INTERNAL << 1);
/*
* For the linux implementation, if we have an unsuccesful
@@ -1404,7 +1404,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
*/
if (freq->transferred_length !=
be32_to_cpu(op->cmd_iu.data_len)) {
- status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+ status = cpu_to_le16(NVME_SC_INTERNAL << 1);
goto done;
}
result.u64 = 0;
@@ -1421,7 +1421,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
freq->transferred_length ||
op->rsp_iu.status_code ||
sqe->common.command_id != cqe->command_id)) {
- status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+ status = cpu_to_le16(NVME_SC_INTERNAL << 1);
goto done;
}
result = cqe->result;
@@ -1429,7 +1429,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
break;
default:
- status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+ status = cpu_to_le16(NVME_SC_INTERNAL << 1);
goto done;
}
@@ -1989,16 +1989,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
* as well as those by FC-NVME spec.
*/
WARN_ON_ONCE(sqe->common.metadata);
- WARN_ON_ONCE(sqe->common.dptr.prp1);
- WARN_ON_ONCE(sqe->common.dptr.prp2);
sqe->common.flags |= NVME_CMD_SGL_METABUF;
/*
- * format SQE DPTR field per FC-NVME rules
- * type=data block descr; subtype=offset;
- * offset is currently 0.
+ * format SQE DPTR field per FC-NVME rules:
+ * type=0x5 Transport SGL Data Block Descriptor
+ * subtype=0xA Transport-specific value
+ * address=0
+ * length=length of the data series
*/
- sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET;
+ sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
+ NVME_SGL_FMT_TRANSPORT_A;
sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
sqe->rw.dptr.sgl.addr = 0;
@@ -2544,10 +2545,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
nvme_fc_abort_aen_ops(ctrl);
/* wait for all io that had to be aborted */
- spin_lock_irqsave(&ctrl->lock, flags);
+ spin_lock_irq(&ctrl->lock);
wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
ctrl->flags &= ~FCCTRL_TERMIO;
- spin_unlock_irqrestore(&ctrl->lock, flags);
+ spin_unlock_irq(&ctrl->lock);
nvme_fc_term_aen_ops(ctrl);
@@ -2733,7 +2734,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
{
struct nvme_fc_ctrl *ctrl;
unsigned long flags;
- int ret, idx;
+ int ret, idx, retry;
if (!(rport->remoteport.port_role &
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -2759,6 +2760,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->rport = rport;
ctrl->dev = lport->dev;
ctrl->cnum = idx;
+ init_waitqueue_head(&ctrl->ioabort_wait);
get_device(ctrl->dev);
kref_init(&ctrl->ref);
@@ -2824,9 +2826,37 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
spin_unlock_irqrestore(&rport->lock, flags);
- ret = nvme_fc_create_association(ctrl);
+ /*
+ * It's possible that transactions used to create the association
+ * may fail. Examples: CreateAssociation LS or CreateIOConnection
+ * LS gets dropped/corrupted/fails; or a frame gets dropped or a
+ * command times out for one of the actions to init the controller
+ * (Connect, Get/Set_Property, Set_Features, etc). Many of these
+ * transport errors (frame drop, LS failure) inherently must kill
+ * the association. The transport is coded so that any command used
+ * to create the association (prior to a LIVE state transition
+ * while NEW or RECONNECTING) will fail if it completes in error or
+ * times out.
+ *
+ * As such: as the connect request was mostly likely due to a
+ * udev event that discovered the remote port, meaning there is
+ * not an admin or script there to restart if the connect
+ * request fails, retry the initial connection creation up to
+ * three times before giving up and declaring failure.
+ */
+ for (retry = 0; retry < 3; retry++) {
+ ret = nvme_fc_create_association(ctrl);
+ if (!ret)
+ break;
+ }
+
if (ret) {
+ /* couldn't schedule retry - fail out */
+ dev_err(ctrl->ctrl.device,
+ "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
+
ctrl->ctrl.opts = NULL;
+
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4a2121335f48..3f5a04c586ce 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -24,6 +24,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/once.h>
#include <linux/pci.h>
#include <linux/poison.h>
#include <linux/t10-pi.h>
@@ -93,7 +94,7 @@ struct nvme_dev {
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
- dma_addr_t cmb_dma_addr;
+ pci_bus_addr_t cmb_bus_addr;
u64 cmb_size;
u32 cmbsz;
u32 cmbloc;
@@ -540,6 +541,20 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
}
#endif
+static void nvme_print_sgl(struct scatterlist *sgl, int nents)
+{
+ int i;
+ struct scatterlist *sg;
+
+ for_each_sg(sgl, sg, nents, i) {
+ dma_addr_t phys = sg_phys(sg);
+ pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+ "dma_address:%pad dma_length:%d\n",
+ i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
+ sg_dma_len(sg));
+ }
+}
+
static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -622,19 +637,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
return BLK_STS_OK;
bad_sgl:
- if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
- blk_rq_payload_bytes(req), iod->nents)) {
- for_each_sg(iod->sg, sg, iod->nents, i) {
- dma_addr_t phys = sg_phys(sg);
- pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
- "dma_address:%pad dma_length:%d\n", i, &phys,
- sg->offset, sg->length,
- &sg_dma_address(sg),
- sg_dma_len(sg));
- }
- }
+ WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
+ "Invalid SGL for payload:%d nents:%d\n",
+ blk_rq_payload_bytes(req), iod->nents);
return BLK_STS_IOERR;
-
}
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
@@ -1220,7 +1226,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+ nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
@@ -1313,11 +1319,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
if (result < 0)
goto release_cq;
+ nvme_init_queue(nvmeq, qid);
result = queue_request_irq(nvmeq);
if (result < 0)
goto release_sq;
- nvme_init_queue(nvmeq, qid);
return result;
release_sq:
@@ -1464,6 +1470,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
return result;
nvmeq->cq_vector = 0;
+ nvme_init_queue(nvmeq, 0);
result = queue_request_irq(nvmeq);
if (result) {
nvmeq->cq_vector = -1;
@@ -1520,7 +1527,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
resource_size_t bar_size;
struct pci_dev *pdev = to_pci_dev(dev->dev);
void __iomem *cmb;
- dma_addr_t dma_addr;
+ int bar;
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!(NVME_CMB_SZ(dev->cmbsz)))
@@ -1533,7 +1540,8 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
size = szu * NVME_CMB_SZ(dev->cmbsz);
offset = szu * NVME_CMB_OFST(dev->cmbloc);
- bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc));
+ bar = NVME_CMB_BIR(dev->cmbloc);
+ bar_size = pci_resource_len(pdev, bar);
if (offset > bar_size)
return NULL;
@@ -1546,12 +1554,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
- cmb = ioremap_wc(dma_addr, size);
+ cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
if (!cmb)
return NULL;
- dev->cmb_dma_addr = dma_addr;
+ dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
dev->cmb_size = size;
return cmb;
}
@@ -2156,7 +2163,6 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
- nvme_init_queue(dev->queues[0], 0);
result = nvme_alloc_admin_tags(dev);
if (result)
goto out;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 58983000964b..0ebb539f3bd3 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -571,6 +571,12 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
return;
+ if (nvme_rdma_queue_idx(queue) == 0) {
+ nvme_rdma_free_qe(queue->device->dev,
+ &queue->ctrl->async_event_sqe,
+ sizeof(struct nvme_command), DMA_TO_DEVICE);
+ }
+
nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
}
@@ -739,8 +745,6 @@ out:
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
- nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
nvme_rdma_stop_queue(&ctrl->queues[0]);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.admin_q);
@@ -765,8 +769,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (new) {
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
- if (IS_ERR(ctrl->ctrl.admin_tagset))
+ if (IS_ERR(ctrl->ctrl.admin_tagset)) {
+ error = PTR_ERR(ctrl->ctrl.admin_tagset);
goto out_free_queue;
+ }
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
@@ -846,8 +852,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
if (new) {
ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
- if (IS_ERR(ctrl->ctrl.tagset))
+ if (IS_ERR(ctrl->ctrl.tagset)) {
+ ret = PTR_ERR(ctrl->ctrl.tagset);
goto out_free_io_queues;
+ }
ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
if (IS_ERR(ctrl->ctrl.connect_q)) {
@@ -942,7 +950,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
- WARN_ON_ONCE(!changed);
+ if (!changed) {
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+ return;
+ }
+
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
@@ -962,7 +975,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, err_work);
- nvme_stop_ctrl(&ctrl->ctrl);
+ nvme_stop_keep_alive(&ctrl->ctrl);
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
@@ -1601,12 +1614,15 @@ nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
/*
* reconnecting state means transport disruption, which
* can take a long time and even might fail permanently,
- * so we can't let incoming I/O be requeued forever.
- * fail it fast to allow upper layers a chance to
- * failover.
+ * fail fast to give upper layers a chance to failover.
+ * deleting state means that the ctrl will never accept
+ * commands again, fail it permanently.
*/
- if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
+ if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING ||
+ queue->ctrl->ctrl.state == NVME_CTRL_DELETING) {
+ nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
+ }
return BLK_STS_RESOURCE; /* try again later */
}
}