diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-29 00:39:37 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-29 00:39:37 +0300 |
commit | fc0586062816559defb14c947319ef8c4c326fb3 (patch) | |
tree | 5ca73bd1fc9de596a11e6d3549fd8fbf6f87dafc /drivers/nvme/target/tcp.c | |
parent | 6c0029211382011af508273c4fc98a732f841d95 (diff) | |
parent | 8324fbae75ce65fc2eb960a8434799dca48248ac (diff) | |
download | linux-fc0586062816559defb14c947319ef8c4c326fb3.tar.xz |
Merge tag 'for-5.13/drivers-2021-04-27' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
- MD changes via Song:
- raid5 POWER fix
- raid1 failure fix
- UAF fix for md cluster
- mddev_find_or_alloc() clean up
- Fix NULL pointer deref with external bitmap
- Performance improvement for raid10 discard requests
- Fix missing information of /proc/mdstat
- rsxx const qualifier removal (Arnd)
- Expose allocated brd pages (Calvin)
- rnbd via Gioh Kim:
- Change maintainer
- Change domain address of maintainers' email
- Add polling IO mode and document update
- Fix memory leak and some bug detected by static code analysis
tools
- Code refactoring
- Series of floppy cleanups/fixes (Denis)
- s390 dasd fixes (Julian)
- kerneldoc fixes (Lee)
- null_blk double free (Lv)
- null_blk virtual boundary addition (Max)
- Remove xsysace driver (Michal)
- umem driver removal (Davidlohr)
- ataflop fixes (Dan)
- Revalidate disk removal (Christoph)
- Bounce buffer cleanups (Christoph)
- Mark lightnvm as deprecated (Christoph)
- mtip32xx init cleanups (Shixin)
- Various fixes (Tian, Gustavo, Coly, Yang, Zhang, Zhiqiang)
* tag 'for-5.13/drivers-2021-04-27' of git://git.kernel.dk/linux-block: (143 commits)
async_xor: increase src_offs when dropping destination page
drivers/block/null_blk/main: Fix a double free in null_init.
md/raid1: properly indicate failure when ending a failed write request
md-cluster: fix use-after-free issue when removing rdev
nvme: introduce generic per-namespace chardev
nvme: cleanup nvme_configure_apst
nvme: do not try to reconfigure APST when the controller is not live
nvme: add 'kato' sysfs attribute
nvme: sanitize KATO setting
nvmet: avoid queuing keep-alive timer if it is disabled
brd: expose number of allocated pages in debugfs
ataflop: fix off by one in ataflop_probe()
ataflop: potential out of bounds in do_format()
drbd: Fix fall-through warnings for Clang
block/rnbd: Use strscpy instead of strlcpy
block/rnbd-clt-sysfs: Remove copy buffer overlap in rnbd_clt_get_path_name
block/rnbd-clt: Remove max_segment_size
block/rnbd-clt: Generate kobject_uevent when the rnbd device state changes
block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev
Documentation/ABI/rnbd-clt: Add description for nr_poll_queues
...
Diffstat (limited to 'drivers/nvme/target/tcp.c')
-rw-r--r-- | drivers/nvme/target/tcp.c | 79 |
1 files changed, 67 insertions, 12 deletions
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index d658c6e8263a..f9f34f6caf5e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -29,6 +29,16 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority"); +/* Define a time period (in usecs) that io_work() shall sample an activated + * queue before determining it to be idle. This optional module behavior + * can enable NIC solutions that support socket optimized packet processing + * using advanced interrupt moderation techniques. + */ +static int idle_poll_period_usecs; +module_param(idle_poll_period_usecs, int, 0644); +MODULE_PARM_DESC(idle_poll_period_usecs, + "nvmet tcp io_work poll till idle time period in usecs"); + #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 @@ -119,6 +129,8 @@ struct nvmet_tcp_queue { struct ahash_request *snd_hash; struct ahash_request *rcv_hash; + unsigned long poll_end; + spinlock_t state_lock; enum nvmet_tcp_queue_state state; @@ -525,11 +537,36 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; + struct nvme_sgl_desc *sgl; + u32 len; + + if (unlikely(cmd == queue->cmd)) { + sgl = &cmd->req.cmd->common.dptr.sgl; + len = le32_to_cpu(sgl->length); + + /* + * Wait for inline data before processing the response. + * Avoid using helpers, this might happen before + * nvmet_req_init is completed. + */ + if (queue->rcv_state == NVMET_TCP_RECV_PDU && + len && len < cmd->req.port->inline_data_size && + nvme_is_write(cmd->req.cmd)) + return; + } llist_add(&cmd->lentry, &queue->resp_list); queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work); } +static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd) +{ + if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED)) + nvmet_tcp_queue_response(&cmd->req); + else + cmd->req.execute(&cmd->req); +} + static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) { u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); @@ -961,7 +998,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) le32_to_cpu(req->cmd->common.dptr.sgl.length)); nvmet_tcp_handle_req_failure(queue, queue->cmd, req); - return -EAGAIN; + return 0; } ret = nvmet_tcp_map_data(queue->cmd); @@ -1104,10 +1141,8 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) return 0; } - if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && - cmd->rbytes_done == cmd->req.transfer_len) { - cmd->req.execute(&cmd->req); - } + if (cmd->rbytes_done == cmd->req.transfer_len) + nvmet_tcp_execute_request(cmd); nvmet_prepare_receive_pdu(queue); return 0; @@ -1144,9 +1179,9 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) goto out; } - if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && - cmd->rbytes_done == cmd->req.transfer_len) - cmd->req.execute(&cmd->req); + if (cmd->rbytes_done == cmd->req.transfer_len) + nvmet_tcp_execute_request(cmd); + ret = 0; out: nvmet_prepare_receive_pdu(queue); @@ -1216,6 +1251,23 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) spin_unlock(&queue->state_lock); } +static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue) +{ + queue->poll_end = jiffies + usecs_to_jiffies(idle_poll_period_usecs); +} + +static bool nvmet_tcp_check_queue_deadline(struct nvmet_tcp_queue *queue, + int ops) +{ + if (!idle_poll_period_usecs) + return false; + + if (ops) + nvmet_tcp_arm_queue_deadline(queue); + + return !time_after(jiffies, queue->poll_end); +} + static void nvmet_tcp_io_work(struct work_struct *w) { struct nvmet_tcp_queue *queue = @@ -1241,9 +1293,10 @@ static void nvmet_tcp_io_work(struct work_struct *w) } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); /* - * We exahusted our budget, requeue our selves + * Requeue the worker if idle deadline period is in progress or any + * ops activity was recorded during the do-while loop above. */ - if (pending) + if (nvmet_tcp_check_queue_deadline(queue, ops) || pending) queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } @@ -1434,7 +1487,7 @@ static void nvmet_tcp_state_change(struct sock *sk) { struct nvmet_tcp_queue *queue; - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (!queue) goto done; @@ -1452,7 +1505,7 @@ static void nvmet_tcp_state_change(struct sock *sk) queue->idx, sk->sk_state); } done: - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) @@ -1501,6 +1554,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) sock->sk->sk_state_change = nvmet_tcp_state_change; queue->write_space = sock->sk->sk_write_space; sock->sk->sk_write_space = nvmet_tcp_write_space; + if (idle_poll_period_usecs) + nvmet_tcp_arm_queue_deadline(queue); queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } write_unlock_bh(&sock->sk->sk_callback_lock); |