From f9d03f96b988002027d4b28ea1b7a24729a4c9b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Dec 2016 15:20:32 -0700 Subject: block: improve handling of the magic discard payload Instead of allocating a single unused biovec for discard requests, send them down without any payload. Instead we allow the driver to add a "special" payload using a biovec embedded into struct request (unioned over other fields never used while in the driver), and overloading the number of segments for this case. This has a couple of advantages: - we don't have to allocate the bio_vec - the amount of special casing for discard requests in the block layer is significantly reduced - using this same scheme for other request types is trivial, which will be important for implementing the new WRITE_ZEROES op on devices where it actually requires a payload (e.g. SCSI) - we can get rid of playing games with the request length, as we'll never touch it and completions will work just fine - it will allow us to support ranged discard operations in the future by merging non-contiguous discard bios into a single request - last but not least it removes a lot of code This patch is the common base for my WIP series for ranges discards and to remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES, so it would be good to get it in quickly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 17 ++++------------- drivers/nvme/host/nvme.h | 6 ++++-- drivers/nvme/host/pci.c | 27 ++++++++++++++------------- drivers/nvme/host/rdma.c | 13 +++++-------- drivers/nvme/target/loop.c | 4 ++-- 5 files changed, 29 insertions(+), 38 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1b48514fbe99..3b1d6478dcfb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -239,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { struct nvme_dsm_range *range; - struct page *page; - int offset; unsigned int nr_bytes = blk_rq_bytes(req); range = kmalloc(sizeof(*range), GFP_ATOMIC); @@ -257,17 +255,10 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, cmnd->dsm.nr = 0; cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - req->completion_data = range; - page = virt_to_page(range); - offset = offset_in_page(range); - blk_add_request_payload(req, page, offset, sizeof(*range)); - - /* - * we set __data_len back to the size of the area to be discarded - * on disk. This allows us to report completion on the full amount - * of blocks described by the request. - */ - req->__data_len = nr_bytes; + req->special_vec.bv_page = virt_to_page(range); + req->special_vec.bv_offset = offset_in_page(range); + req->special_vec.bv_len = sizeof(*range); + req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_MQ_RQ_QUEUE_OK; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a3d6ffd874af..bd5321441d12 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -236,8 +236,10 @@ static inline unsigned nvme_map_len(struct request *rq) static inline void nvme_cleanup_cmd(struct request *req) { - if (req_op(req) == REQ_OP_DISCARD) - kfree(req->completion_data); + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { + kfree(page_address(req->special_vec.bv_page) + + req->special_vec.bv_offset); + } } static inline int nvme_error_status(u16 status) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 82b9b3f1f21d..717d6ea47ee4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -302,14 +302,14 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, static __le64 **iod_list(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (__le64 **)(iod->sg + req->nr_phys_segments); + return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); } static int nvme_init_iod(struct request *rq, unsigned size, struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); - int nseg = rq->nr_phys_segments; + int nseg = blk_rq_nr_phys_segments(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); @@ -339,8 +339,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) __le64 **list = iod_list(req); dma_addr_t prp_dma = iod->first_dma; - nvme_cleanup_cmd(req); - if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, list[0], prp_dma); for (i = 0; i < iod->npages; i++) { @@ -510,7 +508,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_TO_DEVICE : DMA_FROM_DEVICE; int ret = BLK_MQ_RQ_QUEUE_ERROR; - sg_init_table(iod->sg, req->nr_phys_segments); + sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(q, req, iod->sg); if (!iod->nents) goto out; @@ -566,6 +564,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) } } + nvme_cleanup_cmd(req); nvme_free_iod(dev, req); } @@ -596,20 +595,20 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - map_len = nvme_map_len(req); - ret = nvme_init_iod(req, map_len, dev); + ret = nvme_setup_cmd(ns, req, &cmnd); if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; - ret = nvme_setup_cmd(ns, req, &cmnd); + map_len = nvme_map_len(req); + ret = nvme_init_iod(req, map_len, dev); if (ret != BLK_MQ_RQ_QUEUE_OK) - goto out; + goto out_free_cmd; - if (req->nr_phys_segments) + if (blk_rq_nr_phys_segments(req)) ret = nvme_map_data(dev, req, map_len, &cmnd); if (ret != BLK_MQ_RQ_QUEUE_OK) - goto out; + goto out_cleanup_iod; blk_mq_start_request(req); @@ -620,14 +619,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, else ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); - goto out; + goto out_cleanup_iod; } __nvme_submit_cmd(nvmeq, &cmnd); nvme_process_cq(nvmeq); spin_unlock_irq(&nvmeq->q_lock); return BLK_MQ_RQ_QUEUE_OK; -out: +out_cleanup_iod: nvme_free_iod(dev, req); +out_free_cmd: + nvme_cleanup_cmd(req); return ret; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b037d0cb2a7e..251101bf982f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -952,8 +952,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - int nents, count; - int ret; + int count, ret; req->num_sge = 1; req->inline_data = false; @@ -965,16 +964,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, return nvme_rdma_set_sg_null(c); req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments, - req->sg_table.sgl); + ret = sg_alloc_table_chained(&req->sg_table, + blk_rq_nr_phys_segments(rq), req->sg_table.sgl); if (ret) return -ENOMEM; - nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); - BUG_ON(nents > rq->nr_phys_segments); - req->nents = nents; + req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents, + count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(count <= 0)) { sg_free_table_chained(&req->sg_table, true); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 57ded6b3ed8a..9aaa70071ae5 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -185,13 +185,13 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (blk_rq_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; ret = sg_alloc_table_chained(&iod->sg_table, - req->nr_phys_segments, iod->sg_table.sgl); + blk_rq_nr_phys_segments(req), + iod->sg_table.sgl); if (ret) return BLK_MQ_RQ_QUEUE_BUSY; iod->req.sg = iod->sg_table.sgl; iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); - BUG_ON(iod->req.sg_cnt > req->nr_phys_segments); } blk_mq_start_request(req); -- cgit v1.2.3