diff options
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r-- | drivers/nvme/host/tcp.c | 64 |
1 files changed, 47 insertions, 17 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c15a92163c1f..7c7c1886642f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -60,6 +60,7 @@ struct nvme_tcp_request { enum nvme_tcp_queue_flags { NVME_TCP_Q_ALLOCATED = 0, NVME_TCP_Q_LIVE = 1, + NVME_TCP_Q_POLLING = 2, }; enum nvme_tcp_recv_state { @@ -75,6 +76,7 @@ struct nvme_tcp_queue { int io_cpu; spinlock_t lock; + struct mutex send_mutex; struct list_head send_list; /* recv state */ @@ -131,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; static struct blk_mq_ops nvme_tcp_mq_ops; static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) { @@ -257,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } -static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req) +static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, + bool sync) { struct nvme_tcp_queue *queue = req->queue; + bool empty; spin_lock(&queue->lock); + empty = list_empty(&queue->send_list) && !queue->request; list_add_tail(&req->entry, &queue->send_list); spin_unlock(&queue->lock); - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + /* + * if we're the first on the send_list and we can try to send + * directly, otherwise queue io_work. Also, only do that if we + * are on the same cpu, so we don't introduce contention. + */ + if (queue->io_cpu == smp_processor_id() && + sync && empty && mutex_trylock(&queue->send_mutex)) { + nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + } else { + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + } } static inline struct nvme_tcp_request * @@ -578,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, req->state = NVME_TCP_SEND_H2C_PDU; req->offset = 0; - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, false); return 0; } @@ -794,11 +811,12 @@ static void nvme_tcp_data_ready(struct sock *sk) { struct nvme_tcp_queue *queue; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue && queue->rd_enabled)) + if (likely(queue && queue->rd_enabled) && + !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void nvme_tcp_write_space(struct sock *sk) @@ -867,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) if (last && !queue->data_digest) flags |= MSG_EOR; else - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; /* can't zcopy slab pages */ if (unlikely(PageSlab(page))) { @@ -906,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_cmd_pdu *pdu = req->pdu; bool inline_data = nvme_tcp_has_inline_data(req); - int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) + hdgst - req->offset; + int flags = MSG_DONTWAIT; int ret; + if (inline_data) + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + else + flags |= MSG_EOR; + if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); @@ -949,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) ret = kernel_sendpage(queue->sock, virt_to_page(pdu), offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE); + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (unlikely(ret <= 0)) return ret; @@ -1063,11 +1086,14 @@ static void nvme_tcp_io_work(struct work_struct *w) bool pending = false; int result; - result = nvme_tcp_try_send(queue); - if (result > 0) - pending = true; - else if (unlikely(result < 0)) - break; + if (mutex_trylock(&queue->send_mutex)) { + result = nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + if (result > 0) + pending = true; + else if (unlikely(result < 0)) + break; + } result = nvme_tcp_try_recv(queue); if (result > 0) @@ -1319,6 +1345,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); spin_lock_init(&queue->lock); + mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); queue->queue_size = queue_size; @@ -1543,6 +1570,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = 1; @@ -1554,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ set->numa_node = NUMA_NO_NODE; - set->flags = BLK_MQ_F_SHOULD_MERGE; + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; @@ -2113,7 +2141,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ctrl->async_req.curr_bio = NULL; ctrl->async_req.data_len = 0; - nvme_tcp_queue_request(&ctrl->async_req); + nvme_tcp_queue_request(&ctrl->async_req, true); } static enum blk_eh_timer_return @@ -2244,7 +2272,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, true); return BLK_STS_OK; } @@ -2302,9 +2330,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; + set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); + clear_bit(NVME_TCP_Q_POLLING, &queue->flags); return queue->nr_cqe; } |