From 4e7f0ece41e1be8f876f320a0972a715daec0a50 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Apr 2022 06:53:12 +0200 Subject: nvme: remove a spurious clear of discard_alignment The nvme driver never sets a discard_alignment, so it also doens't need to clear it to zero. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220418045314.360785-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b9b0fbde97c8..76a9ccd5d064 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1628,7 +1628,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; /* If discard is already enabled, don't reset queue limits */ -- cgit v1.2.3 From 1a86924e4f464757546d7f7bdc469be237918395 Mon Sep 17 00:00:00 2001 From: Tom Yan Date: Fri, 29 Apr 2022 12:52:43 +0800 Subject: nvme: fix interpretation of DMRSL DMRSLl is in the unit of logical blocks, while max_discard_sectors is in the unit of "linux sector". Signed-off-by: Tom Yan Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++++-- drivers/nvme/host/nvme.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 76a9ccd5d064..f41b2b18fad9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1634,6 +1634,9 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) if (queue->limits.max_discard_sectors) return; + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) + ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); @@ -2893,8 +2896,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) if (id->dmrl) ctrl->max_discard_segments = id->dmrl; - if (id->dmrsl) - ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl); + ctrl->dmrsl = le32_to_cpu(id->dmrsl); if (id->wzsl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2b53ca63335..81c4f5379c0c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -284,6 +284,7 @@ struct nvme_ctrl { #endif u16 crdt[3]; u16 oncs; + u32 dmrsl; u16 oacs; u16 sqsize; u32 max_namespaces; -- cgit v1.2.3 From 52fde2c07da606f3f120af4f734eadcfb52b04be Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 4 May 2022 11:43:25 -0700 Subject: nvme: set dma alignment to dword The nvme specification only requires qword alignment for segment descriptors, and the driver already guarantees that. The spec has always allowed user data to be dword aligned, which is what the queue's attribute is for, so relax the alignment requirement to that value. While we could allow byte alignment for some controllers when using SGLs, we still need to support PRP, and that only allows dword. Fixes: 3b2a1ebceba3 ("nvme: set dma alignment to qword") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f41b2b18fad9..9a6fb071d986 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1773,7 +1773,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 7); + blk_queue_dma_alignment(q, 3); blk_queue_write_cache(q, vwc, vwc); } -- cgit v1.2.3 From ca2d89925ae3f3d5c65182ff75e58bc9b484e69c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 Apr 2022 12:19:35 +0300 Subject: nvme: add missing status values to verbose logging Log a few more path related status codes. Signed-off-by: Max Gurtovoy Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/constants.c | 3 +++ include/linux/nvme.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 7d49eb34b348..465aee42fced 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = { [NVME_SC_COMPARE_FAILED] = "Compare Failure", [NVME_SC_ACCESS_DENIED] = "Access Denied", [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block", + [NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error", [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss", [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible", [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition", + [NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error", [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error", + [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command", }; const unsigned char *nvme_get_error_status_str(u16 status) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f626a445d1a8..bbabdc7600da 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1679,9 +1679,11 @@ enum { /* * Path-related Errors: */ + NVME_SC_INTERNAL_PATH_ERROR = 0x300, NVME_SC_ANA_PERSISTENT_LOSS = 0x301, NVME_SC_ANA_INACCESSIBLE = 0x302, NVME_SC_ANA_TRANSITION = 0x303, + NVME_SC_CTRL_PATH_ERROR = 0x360, NVME_SC_HOST_PATH_ERROR = 0x370, NVME_SC_HOST_ABORTED_CMD = 0x371, -- cgit v1.2.3 From da3340e77eeb4ced79784eaadbcc529e1ecef673 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 Apr 2022 12:15:24 +0300 Subject: nvme: remove unneeded include from constants file No usage of blkdev.h elements. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/constants.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 465aee42fced..1dd1d78de295 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -4,7 +4,6 @@ * Copyright (c) 2022, Oracle and/or its affiliates */ -#include #include "nvme.h" #ifdef CONFIG_NVME_VERBOSE_ERRORS -- cgit v1.2.3 From 128126a7943622424350752a71be5bb95e7946db Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 Apr 2022 15:53:51 -0700 Subject: nvme: mark internal passthru request RQF_QUIET Most of the internal passthru commands use __nvme_submit_sync_cmd() interface. There are few places we open code the request submission :- 1. nvme_keep_alive_work(struct work_struct *work) 2. nvme_timeout(struct request *req, bool reserved) 3. nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) Mark the internal passthru request quiet so that we can skip the verbose error message from nvme_log_error() in nvme_end_req() completion path, this will be consistent with what we have in __nvme_submit_sync_cmd(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Alan Adamson Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/pci.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9a6fb071d986..42f9772abc4d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work) rq->timeout = ctrl->kato * HZ; rq->end_io_data = ctrl; + rq->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3aacf1c0d5a5..f326261456ac 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_init_request(abort_req, &cmd); abort_req->end_io_data = NULL; + abort_req->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(abort_req, false, abort_endio); /* @@ -2486,6 +2487,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); + req->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ? nvme_del_cq_end : nvme_del_queue_end); return 0; -- cgit v1.2.3 From da42761181627e9bdc37d18368b827948a583929 Mon Sep 17 00:00:00 2001 From: "Smith, Kyle Miller (Nimble Kernel)" Date: Fri, 22 Apr 2022 14:40:32 +0000 Subject: nvme-pci: fix a NULL pointer dereference in nvme_alloc_admin_tags In nvme_alloc_admin_tags, the admin_q can be set to an error (typically -ENOMEM) if the blk_mq_init_queue call fails to set up the queue, which is checked immediately after the call. However, when we return the error message up the stack, to nvme_reset_work the error takes us to nvme_remove_dead_ctrl() nvme_dev_disable() nvme_suspend_queue(&dev->queues[0]). Here, we only check that the admin_q is non-NULL, rather than not an error or NULL, and begin quiescing a queue that never existed, leading to bad / NULL pointer dereference. Signed-off-by: Kyle Smith Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f326261456ac..e951bd9b8159 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1776,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); + dev->ctrl.admin_q = NULL; return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { -- cgit v1.2.3 From b98235d3a471e121376bfabce27380dde5add1d9 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Fri, 6 May 2022 12:15:34 +0200 Subject: nvme-pci: harden drive presence detect in nvme_dev_disable() On our ZynqMP system we observe, that a NVMe drive that resets itself while doing a firmware update causes a Kernel crash like this: [ 67.720772] pcieport 0000:02:02.0: pciehp: Slot(2): Link Down [ 67.720783] pcieport 0000:02:02.0: pciehp: Slot(2): Card not present [ 67.720795] nvme 0000:04:00.0: PME# disabled [ 67.720849] Internal error: synchronous external abort: 96000010 [#1] PREEMPT SMP [ 67.720853] nwl-pcie fd0e0000.pcie: Slave error Analysis: When nvme_dev_disable() is called because of this PCIe hotplug event, pci_is_enabled() is still true. And accessing the NVMe drive which is currently not available as it's in reboot process causes this "synchronous external abort" on this ARM64 platform. This patch adds the pci_device_is_present() check as well, which returns false in this "Card not present" hot-plug case. With this change, the NVMe driver does not try to access the NVMe registers any more and the FW update finishes without any problems. Signed-off-by: Stefan Roese Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e951bd9b8159..5a98a7de0964 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2678,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) struct pci_dev *pdev = to_pci_dev(dev->dev); mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(pdev)) { + if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) { u32 csts = readl(dev->bar + NVME_REG_CSTS); if (dev->ctrl.state == NVME_CTRL_LIVE || -- cgit v1.2.3 From 93ba75c90524618ef2c20979b0e660b9d071f0e6 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 30 Mar 2022 02:40:32 -0700 Subject: nvme-fabrics: add a request timeout helper The RDAMA and TCP transport both complete the timed out request in the same manner and hence code is duplicated. Add and use the helper nvmf_complete_timed_out_request() to remove the duplicate code. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 8 ++++++++ drivers/nvme/host/rdma.c | 5 +---- drivers/nvme/host/tcp.c | 5 +---- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 1e3a09cad961..46d6e194ac2b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl) return ctrl->subsys->subnqn; } +static inline void nvmf_complete_timed_out_request(struct request *rq) +{ + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { + nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; + blk_mq_complete_request(rq); + } +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d9f19d901313..b87c8ae41d9b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq) struct nvme_rdma_queue *queue = req->queue; nvme_rdma_stop_queue(queue); - if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { - nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; - blk_mq_complete_request(rq); - } + nvmf_complete_timed_out_request(rq); } static enum blk_eh_timer_return diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ad3a2bf2f1e9..bb67538d241b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { - nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; - blk_mq_complete_request(rq); - } + nvmf_complete_timed_out_request(rq); } static enum blk_eh_timer_return -- cgit v1.2.3 From 354201c53e61e493017b15327294b0c8ab522d69 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 May 2022 15:09:21 +0200 Subject: nvme: add support for TP4084 - Time-to-Ready Enhancements Add support for using longer timeouts during controller initialization and letting the controller come up with namespaces that are not ready for I/O yet. We skip these not ready namespaces during scanning and only bring them online once anoter scan is kicked off by the AEN that is set when the NRDY bit gets set in the I/O Command Set Independent Identify Namespace Data Structure. This asynchronous probing avoids blocking the kernel boot when controllers take a very long time to recover after unclean shutdowns (up to minutes). Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke --- drivers/nvme/host/constants.c | 1 + drivers/nvme/host/core.c | 76 +++++++++++++++++++++++++++++++++++++++---- include/linux/nvme.h | 31 ++++++++++++++++++ 3 files changed, 102 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 1dd1d78de295..4910543f00ff 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -91,6 +91,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", + [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready", [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", [NVME_SC_LBA_RANGE] = "LBA Out of Range", [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded", diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 42f9772abc4d..faeb03271913 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1427,6 +1427,32 @@ out_free_id: return error; } +static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_id_ns_cs_indep **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + .identify.cns = NVME_ID_CNS_NS_CS_INDEP, + }; + int ret; + + *id = kmalloc(sizeof(**id), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); + if (ret) { + dev_warn(ctrl->device, + "Identify namespace (CS independent) failed (%d)\n", + ret); + kfree(*id); + return ret; + } + + return 0; +} + static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result) { @@ -2103,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; -static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) +static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) { - unsigned long timeout = - ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; + unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies; u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; int ret; @@ -2119,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) usleep_range(1000, 2000); if (fatal_signal_pending(current)) return -EINTR; - if (time_after(jiffies, timeout)) { + if (time_after(jiffies, timeout_jiffies)) { dev_err(ctrl->device, "Device not ready; aborting %s, CSTS=0x%x\n", enabled ? "initialisation" : "reset", csts); @@ -2150,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); - return nvme_wait_ready(ctrl, ctrl->cap, false); + return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false); } EXPORT_SYMBOL_GPL(nvme_disable_ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl) { unsigned dev_page_min; + u32 timeout; int ret; ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); @@ -2177,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ctrl->ctrl_config = NVME_CC_CSS_CSI; else ctrl->ctrl_config = NVME_CC_CSS_NVM; + + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { + u32 crto; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); + if (ret) { + dev_err(ctrl->device, "Reading CRTO failed (%d)\n", + ret); + return ret; + } + + if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { + ctrl->ctrl_config |= NVME_CC_CRIME; + timeout = NVME_CRTO_CRIMT(crto); + } else { + timeout = NVME_CRTO_CRWMT(crto); + } + } else { + timeout = NVME_CAP_TIMEOUT(ctrl->cap); + } + ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; @@ -2185,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - return nvme_wait_ready(ctrl, ctrl->cap, true); + return nvme_wait_ready(ctrl, timeout, true); } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); @@ -4092,11 +4139,26 @@ out: static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_ids ids = { }; + struct nvme_id_ns_cs_indep *id; struct nvme_ns *ns; + bool ready = true; if (nvme_identify_ns_descs(ctrl, nsid, &ids)) return; + /* + * Check if the namespace is ready. If not ignore it, we will get an + * AEN once it becomes ready and restart the scan. + */ + if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) && + !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) { + ready = id->nstat & NVME_NSTAT_NRDY; + kfree(id); + } + + if (!ready) + return; + ns = nvme_find_get_ns(ctrl, nsid); if (ns) { nvme_validate_ns(ns, &ids); @@ -4841,6 +4903,8 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) != + NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5f6d432fa06a..29ec3e3481ff 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -137,6 +137,7 @@ enum { NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory * Space Control */ + NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ @@ -161,6 +162,9 @@ enum { #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) +#define NVME_CRTO_CRIMT(crto) ((crto) >> 16) +#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff) + enum { NVME_CMBSZ_SQS = 1 << 0, NVME_CMBSZ_CQS = 1 << 1, @@ -204,6 +208,7 @@ enum { NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + NVME_CC_CRIME = 1 << 24, }; enum { @@ -227,6 +232,11 @@ enum { NVME_CAP_CSS_CSI = 1 << 6, }; +enum { + NVME_CAP_CRMS_CRIMS = 1ULL << 59, + NVME_CAP_CRMS_CRWMS = 1ULL << 60, +}; + struct nvme_id_power_state { __le16 max_power; /* centiwatts */ __u8 rsvd2; @@ -414,6 +424,21 @@ struct nvme_id_ns { __u8 vs[3712]; }; +/* I/O Command Set Independent Identify Namespace Data Structure */ +struct nvme_id_ns_cs_indep { + __u8 nsfeat; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __le32 anagrpid; + __u8 nsattr; + __u8 rsvd9; + __le16 nvmsetid; + __le16 endgid; + __u8 nstat; + __u8 rsvd15[4081]; +}; + struct nvme_zns_lbafe { __le64 zsze; __u8 zdes; @@ -478,6 +503,7 @@ enum { NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_CS_NS = 0x05, NVME_ID_CNS_CS_CTRL = 0x06, + NVME_ID_CNS_NS_CS_INDEP = 0x08, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, @@ -531,6 +557,10 @@ enum { NVME_NS_DPS_PI_TYPE3 = 3, }; +enum { + NVME_NSTAT_NRDY = 1 << 0, +}; + enum { NVME_NVM_NS_16B_GUARD = 0, NVME_NVM_NS_32B_GUARD = 1, @@ -1592,6 +1622,7 @@ enum { NVME_SC_NS_WRITE_PROTECTED = 0x20, NVME_SC_CMD_INTERRUPTED = 0x21, NVME_SC_TRANSIENT_TR_ERR = 0x22, + NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24, NVME_SC_INVALID_IO_CMD_SET = 0x2C, NVME_SC_LBA_RANGE = 0x80, -- cgit v1.2.3 From 78288665b5d0154978fed431985310cb4f166836 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 18 May 2022 08:51:38 -0700 Subject: nvme: set non-mdts limits in nvme_scan_work In current implementation we set the non-mdts limits by calling nvme_init_non_mdts_limits() from nvme_init_ctrl_finish(). This also tries to set the limits for the discovery controller which has no I/O queues resulting in the warning message reported by the nvme_log_error() when running blktest nvme/002: - [ 2005.155946] run blktests nvme/002 at 2022-04-09 16:57:47 [ 2005.192223] loop: module loaded [ 2005.196429] nvmet: adding nsid 1 to subsystem blktests-subsystem-0 [ 2005.200334] nvmet: adding nsid 1 to subsystem blktests-subsystem-1 <------------------------------SNIP----------------------------------> [ 2008.958108] nvmet: adding nsid 1 to subsystem blktests-subsystem-997 [ 2008.962082] nvmet: adding nsid 1 to subsystem blktests-subsystem-998 [ 2008.966102] nvmet: adding nsid 1 to subsystem blktests-subsystem-999 [ 2008.973132] nvmet: creating discovery controller 1 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN testhostnqn. *[ 2008.973196] nvme1: Identify(0x6), Invalid Field in Command (sct 0x0 / sc 0x2) MORE DNR* [ 2008.974595] nvme nvme1: new ctrl: "nqn.2014-08.org.nvmexpress.discovery" [ 2009.103248] nvme nvme1: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" Move the call of nvme_init_non_mdts_limits() to nvme_scan_work() after we verify that I/O queues are created since that is a converging point for each transport where these limits are actually used. 1. FC : nvme_fc_create_association() ... nvme_fc_create_io_queues(ctrl); ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() 2. PCIe:- nvme_reset_work() ... nvme_setup_io_queues() nvme_create_io_queues() nvme_alloc_queue() ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() 3. RDMA :- nvme_rdma_setup_ctrl ... nvme_rdma_configure_io_queues ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() 4. TCP :- nvme_tcp_setup_ctrl ... nvme_tcp_configure_io_queues ... nvme_start_ctrl() nvme_scan_queue() nvme_scan_work() * nvme_scan_work() ... nvme_validate_or_alloc_ns() nvme_alloc_ns() nvme_update_ns_info() nvme_update_disk_info() nvme_config_discard() <--- blk_queue_max_write_zeroes_sectors() <--- Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index faeb03271913..a7660e4be55b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3129,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_init_non_mdts_limits(ctrl); - if (ret < 0) - return ret; - ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -4301,11 +4297,26 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); + int ret; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) return; + /* + * Identify controller limits can change at controller reset due to + * new firmware download, even though it is not common we cannot ignore + * such scenario. Controller's non-mdts limits are reported in the unit + * of logical blocks that is dependent on the format of attached + * namespace. Hence re-read the limits at the time of ns allocation. + */ + ret = nvme_init_non_mdts_limits(ctrl); + if (ret < 0) { + dev_warn(ctrl->device, + "reading non-mdts-limits failed: %d\n", ret); + return; + } + if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); -- cgit v1.2.3