From 27ddb689909cd0bab30524a5f720ae3a3e55acac Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 1 Feb 2017 09:53:15 -0800 Subject: PCI: add an API to get node from vector Next patch will use the API to get the node from vector for nvme device Signed-off-by: Shaohua Li Reviewed-by: Christoph Hellwig Acked-by: Bjorn Helgaas Signed-off-by: Jens Axboe --- drivers/pci/msi.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 980eaf588281..d571bc330686 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1298,6 +1298,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) } EXPORT_SYMBOL(pci_irq_get_affinity); +/** + * pci_irq_get_node - return the numa node of a particular msi vector + * @pdev: PCI device to operate on + * @vec: device-relative interrupt vector index (0-based). + */ +int pci_irq_get_node(struct pci_dev *pdev, int vec) +{ + const struct cpumask *mask; + + mask = pci_irq_get_affinity(pdev, vec); + if (mask) + return local_memory_node(cpu_to_node(cpumask_first(mask))); + return dev_to_node(&pdev->dev); +} +EXPORT_SYMBOL(pci_irq_get_node); + struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { return to_pci_dev(desc->dev); -- cgit v1.2.3 From d3af3ecdc62c46fa67ce7a681f173acb1d750e33 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 1 Feb 2017 09:53:16 -0800 Subject: nvme: allocate nvme_queue in correct node nvme_queue is per-cpu queue (mostly). Allocating it in node where blk-mq will use it. Signed-off-by: Shaohua Li Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 57a1af52b06e..eee8f8426ff2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1038,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth) + int depth, int node) { - struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); + struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL, + node); if (!nvmeq) return NULL; @@ -1217,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) nvmeq = dev->queues[0]; if (!nvmeq) { - nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); + nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, + dev_to_node(dev->dev)); if (!nvmeq) return -ENOMEM; } @@ -1309,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev) int ret = 0; for (i = dev->queue_count; i <= dev->max_qid; i++) { - if (!nvme_alloc_queue(dev, i, dev->q_depth)) { + /* vector == qid - 1, match nvme_create_queue */ + if (!nvme_alloc_queue(dev, i, dev->q_depth, + pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { ret = -ENOMEM; break; } -- cgit v1.2.3 From 6a8a21546507a3ec88e81c2ec927a3fb63efa8ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Mar 2017 11:47:22 -0500 Subject: nbd: stop leaking sockets This was introduced in the multi-connection patch, we've been leaking socket's ever since. Fixes: 9561a7a ("nbd: add multi-connection support") cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0bf2b21a62cb..c7e93f62366f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -689,8 +689,10 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) nbd->num_connections) { int i; - for (i = 0; i < nbd->num_connections; i++) + for (i = 0; i < nbd->num_connections; i++) { + sockfd_put(nbd->socks[i]->sock); kfree(nbd->socks[i]); + } kfree(nbd->socks); nbd->socks = NULL; nbd->num_connections = 0; -- cgit v1.2.3 From 302ad8cc09339ea261eef58a8d5f4a116a8ffda5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 1 Mar 2017 14:22:12 -0500 Subject: nvme: Complete all stuck requests If the nvme driver is shutting down its controller, the drievr will not start the queues up again, preventing blk-mq's hot CPU notifier from making forward progress. To fix that, this patch starts a request_queue freeze when the driver resets a controller so no new requests may enter. The driver will wait for frozen after IO queues are restarted to ensure the queue reference can be reinitialized when nvme requests to unfreeze the queues. If the driver is doing a safe shutdown, the driver will wait for the controller to successfully complete all inflight requests so that we don't unnecessarily fail them. Once the controller has been disabled, the queues will be restarted to force remaining entered requests to end in failure so that blk-mq's hot cpu notifier may progress. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 4 ++++ drivers/nvme/host/pci.c | 33 ++++++++++++++++++++++++++++----- 3 files changed, 79 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 25ec4e585220..9b3b57fef446 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_kill_queues); +void nvme_unfreeze(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_unfreeze_queue(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_unfreeze); + +void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) { + timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); + if (timeout <= 0) + break; + } + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); + +void nvme_wait_freeze(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_freeze_queue_wait(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_wait_freeze); + +void nvme_start_freeze(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_freeze_queue_start(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_start_freeze); + void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a3da1e90b99d..2aa20e3e5675 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); +void nvme_unfreeze(struct nvme_ctrl *ctrl); +void nvme_wait_freeze(struct nvme_ctrl *ctrl); +void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); +void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index eee8f8426ff2..26a5fd05fe88 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1675,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { int i, queues; - u32 csts = -1; + bool dead = true; + struct pci_dev *pdev = to_pci_dev(dev->dev); del_timer_sync(&dev->watchdog_timer); mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(to_pci_dev(dev->dev))) { - nvme_stop_queues(&dev->ctrl); - csts = readl(dev->bar + NVME_REG_CSTS); + if (pci_is_enabled(pdev)) { + u32 csts = readl(dev->bar + NVME_REG_CSTS); + + if (dev->ctrl.state == NVME_CTRL_LIVE) + nvme_start_freeze(&dev->ctrl); + dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || + pdev->error_state != pci_channel_io_normal); } + /* + * Give the controller a chance to complete all entered requests if + * doing a safe shutdown. + */ + if (!dead && shutdown) + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); + nvme_stop_queues(&dev->ctrl); + queues = dev->online_queues - 1; for (i = dev->queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); - if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { + if (dead) { /* A device might become IO incapable very soon during * probe, before the admin queue is configured. Thus, * queue_count can be 0 here. @@ -1704,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); + + /* + * The driver will not be starting up queues again if shutting down so + * must flush all entered requests to their failed completion to avoid + * deadlocking blk-mq hot-cpu notifier. + */ + if (shutdown) + nvme_start_queues(&dev->ctrl); mutex_unlock(&dev->shutdown_lock); } @@ -1826,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work) nvme_remove_namespaces(&dev->ctrl); } else { nvme_start_queues(&dev->ctrl); + nvme_wait_freeze(&dev->ctrl); nvme_dev_add(dev); + nvme_unfreeze(&dev->ctrl); } if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { -- cgit v1.2.3 From e02898b423802b1f3a3aaa7f16e896da069ba8f7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Mar 2017 10:42:38 -0800 Subject: loop: fix LO_FLAGS_PARTSCAN hang loop_reread_partitions() needs to do I/O, but we just froze the queue, so we end up waiting forever. This can easily be reproduced with losetup -P. Fix it by moving the reread to after we unfreeze the queue. Fixes: ecdd09597a57 ("block/loop: fix race between I/O and set_status") Reported-by: Tejun Heo Cc: stable@vger.kernel.org Signed-off-by: Omar Sandoval Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4b52a1690329..132c9f371dce 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) (info->lo_flags & LO_FLAGS_AUTOCLEAR)) lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; - if ((info->lo_flags & LO_FLAGS_PARTSCAN) && - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_flags |= LO_FLAGS_PARTSCAN; - lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); - } - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; @@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) exit: blk_mq_unfreeze_queue(lo->lo_queue); + + if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && + !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { + lo->lo_flags |= LO_FLAGS_PARTSCAN; + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; + loop_reread_partitions(lo, lo->lo_device); + } + return err; } -- cgit v1.2.3