From d5887dc6b6c054d0da3cd053afc15b7be1f45ff6 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 22 Apr 2024 12:28:23 -0400 Subject: nvme-pci: Add quirk for broken MSIs Sandisk SN530 NVMe drives have broken MSIs. On systems without MSI-X support, all commands time out resulting in the following message: nvme nvme0: I/O tag 12 (100c) QID 0 timeout, completion polled These timeouts cause the boot to take an excessively-long time (over 20 minutes) while the initial command queue is flushed. Address this by adding a quirk for drives with buggy MSIs. The lspci output for this device (recorded on a system with MSI-X support) is: 02:00.0 Non-Volatile memory controller: Sandisk Corp Device 5008 (rev 01) (prog-if 02 [NVM Express]) Subsystem: Sandisk Corp Device 5008 Flags: bus master, fast devsel, latency 0, IRQ 16, NUMA node 0 Memory at f7e00000 (64-bit, non-prefetchable) [size=16K] Memory at f7e04000 (64-bit, non-prefetchable) [size=256] Capabilities: [80] Power Management version 3 Capabilities: [90] MSI: Enable- Count=1/32 Maskable- 64bit+ Capabilities: [b0] MSI-X: Enable+ Count=17 Masked- Capabilities: [c0] Express Endpoint, MSI 00 Capabilities: [100] Advanced Error Reporting Capabilities: [150] Device Serial Number 00-00-00-00-00-00-00-00 Capabilities: [1b8] Latency Tolerance Reporting Capabilities: [300] Secondary PCI Express Capabilities: [900] L1 PM Substates Kernel driver in use: nvme Kernel modules: nvme Cc: Signed-off-by: Sean Anderson Reviewed-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 14 +++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b564c5f1450c..05532c281177 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -162,6 +162,11 @@ enum nvme_quirks { * Disables simple suspend/resume path. */ NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20), + + /* + * MSI (but not MSI-X) interrupts are broken and never fire. + */ + NVME_QUIRK_BROKEN_MSI = (1 << 21), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e393f6947ce4..710043086dff 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2224,6 +2224,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) .priv = dev, }; unsigned int irq_queues, poll_queues; + unsigned int flags = PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY; /* * Poll queues don't need interrupts, but we need at least one I/O queue @@ -2247,8 +2248,10 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) irq_queues = 1; if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)) irq_queues += (nr_io_queues - poll_queues); - return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + if (dev->ctrl.quirks & NVME_QUIRK_BROKEN_MSI) + flags &= ~PCI_IRQ_MSI; + return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, flags, + &affd); } static unsigned int nvme_max_io_queues(struct nvme_dev *dev) @@ -2477,6 +2480,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) { int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); + unsigned int flags = PCI_IRQ_ALL_TYPES; if (pci_enable_device_mem(pdev)) return result; @@ -2493,7 +2497,9 @@ static int nvme_pci_enable(struct nvme_dev *dev) * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll * adjust this later. */ - result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (dev->ctrl.quirks & NVME_QUIRK_BROKEN_MSI) + flags &= ~PCI_IRQ_MSI; + result = pci_alloc_irq_vectors(pdev, 1, 1, flags); if (result < 0) goto disable; @@ -3390,6 +3396,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */ + .driver_data = NVME_QUIRK_BROKEN_MSI }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ -- cgit v1.2.3 From 4b9a89be214235acbff003232baba123c868a25c Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 12 Apr 2024 15:41:54 +0200 Subject: nvmet-auth: return the error code to the nvmet_auth_ctrl_hash() callers If nvmet_auth_ctrl_hash() fails, return the error code to its callers Signed-off-by: Maurizio Lombardi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index fb518b00f71f..4f08362aee8b 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -480,7 +480,7 @@ out_free_response: nvme_auth_free_key(transformed_key); out_free_tfm: crypto_free_shash(shash_tfm); - return 0; + return ret; } int nvmet_auth_ctrl_exponential(struct nvmet_req *req, -- cgit v1.2.3 From 34cfb09cdc75457a671279165a88a0739a170f07 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 7 May 2024 09:54:10 +0300 Subject: nvmet: make nvmet_wq unbound When deleting many controllers one-by-one, it takes a very long time as these work elements may serialize as they are scheduled on the executing cpu instead of spreading. In general nvmet_wq can definitely be used for long standing work elements so its better to make it unbound regardless. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index e06013c5dace..2fde22323622 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1686,7 +1686,8 @@ static int __init nvmet_init(void) if (!buffered_io_wq) goto out_free_zbd_work_queue; - nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); + nvmet_wq = alloc_workqueue("nvmet-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!nvmet_wq) goto out_free_buffered_work_queue; -- cgit v1.2.3 From d15dcd0f1a4753b57e66c64c8dc2a9779ff96aab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 May 2024 10:43:04 +0300 Subject: nvmet: prevent sprintf() overflow in nvmet_subsys_nsid_exists() The nsid value is a u32 that comes from nvmet_req_find_ns(). It's endian data and we're on an error path and both of those raise red flags. So let's make this safer. 1) Make the buffer large enough for any u32. 2) Remove the unnecessary initialization. 3) Use snprintf() instead of sprintf() for even more safety. 4) The sprintf() function returns the number of bytes printed, not counting the NUL terminator. It is impossible for the return value to be <= 0 so delete that. Fixes: 505363957fad ("nvmet: fix nvme status code when namespace is disabled") Signed-off-by: Dan Carpenter Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 89a001101a7f..7fda69395c1e 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -757,10 +757,9 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid) { struct config_item *ns_item; - char name[4] = {}; + char name[12]; - if (sprintf(name, "%u", nsid) <= 0) - return false; + snprintf(name, sizeof(name), "%u", nsid); mutex_lock(&subsys->namespaces_group.cg_subsys->su_mutex); ns_item = config_group_find_item(&subsys->namespaces_group, name); mutex_unlock(&subsys->namespaces_group.cg_subsys->su_mutex); -- cgit v1.2.3 From 73964c1d07c054376f1b32a62548571795159148 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 May 2024 10:53:06 +0300 Subject: nvmet-rdma: fix possible bad dereference when freeing rsps It is possible that the host connected and saw a cm established event and started sending nvme capsules on the qp, however the ctrl did not yet see an established event. This is why the rsp_wait_list exists (for async handling of these cmds, we move them to a pending list). Furthermore, it is possible that the ctrl cm times out, resulting in a connect-error cm event. in this case we hit a bad deref [1] because in nvmet_rdma_free_rsps we assume that all the responses are in the free list. We are freeing the cmds array anyways, so don't even bother to remove the rsp from the free_list. It is also guaranteed that we are not racing anything when we are releasing the queue so no other context accessing this array should be running. [1]: -- Workqueue: nvmet-free-wq nvmet_rdma_free_queue_work [nvmet_rdma] [...] pc : nvmet_rdma_free_rsps+0x78/0xb8 [nvmet_rdma] lr : nvmet_rdma_free_queue_work+0x88/0x120 [nvmet_rdma] Call trace: nvmet_rdma_free_rsps+0x78/0xb8 [nvmet_rdma] nvmet_rdma_free_queue_work+0x88/0x120 [nvmet_rdma] process_one_work+0x1ec/0x4a0 worker_thread+0x48/0x490 kthread+0x158/0x160 ret_from_fork+0x10/0x18 -- Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5b8c63e74639..6e1b4140cde0 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -474,12 +474,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) return 0; out_free: - while (--i >= 0) { - struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; - - list_del(&rsp->free_list); - nvmet_rdma_free_rsp(ndev, rsp); - } + while (--i >= 0) + nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); kfree(queue->rsps); out: return ret; @@ -490,12 +486,8 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) struct nvmet_rdma_device *ndev = queue->dev; int i, nr_rsps = queue->recv_queue_size * 2; - for (i = 0; i < nr_rsps; i++) { - struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; - - list_del(&rsp->free_list); - nvmet_rdma_free_rsp(ndev, rsp); - } + for (i = 0; i < nr_rsps; i++) + nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); kfree(queue->rsps); } -- cgit v1.2.3