diff options
author | Jens Axboe <axboe@kernel.dk> | 2024-02-02 20:20:05 +0300 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2024-02-08 23:27:06 +0300 |
commit | af5d68f8892f8ee8f137648b79ceb2abc153a19b (patch) | |
tree | d76f98bfd630ee568e19f16aa2646de2b64dc1df /io_uring/io_uring.c | |
parent | 2708af1adc11700c6c3ce4109e3b133079a36a78 (diff) | |
download | linux-af5d68f8892f8ee8f137648b79ceb2abc153a19b.tar.xz |
io_uring/sqpoll: manage task_work privately
Decouple from task_work running, and cap the number of entries we process
at the time. If we exceed that number, push remaining entries to a retry
list that we'll process first next time.
We cap the number of entries to process at 8, which is fairly random.
We just want to get enough per-ctx batching here, while not processing
endlessly.
Since we manually run PF_IO_WORKER related task_work anyway as the task
never exits to userspace, with this we no longer need to add an actual
task_work item to the per-process list.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r-- | io_uring/io_uring.c | 46 |
1 files changed, 36 insertions, 10 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index bfd2f0fff153..f1e3646c727c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1173,7 +1173,14 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts) percpu_ref_put(&ctx->refs); } -static void handle_tw_list(struct llist_node *node, unsigned int *count) +/* + * Run queued task_work, returning the number of entries processed in *count. + * If more entries than max_entries are available, stop processing once this + * is reached and return the rest of the list. + */ +struct llist_node *io_handle_tw_list(struct llist_node *node, + unsigned int *count, + unsigned int max_entries) { struct io_ring_ctx *ctx = NULL; struct io_tw_state ts = { }; @@ -1200,9 +1207,10 @@ static void handle_tw_list(struct llist_node *node, unsigned int *count) ctx = NULL; cond_resched(); } - } while (node); + } while (node && *count < max_entries); ctx_flush_and_put(ctx, &ts); + return node; } /** @@ -1247,27 +1255,41 @@ static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync) } } -void tctx_task_work(struct callback_head *cb) +struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, + unsigned int max_entries, + unsigned int *count) { - struct io_uring_task *tctx = container_of(cb, struct io_uring_task, - task_work); struct llist_node *node; - unsigned int count = 0; if (unlikely(current->flags & PF_EXITING)) { io_fallback_tw(tctx, true); - return; + return NULL; } node = llist_del_all(&tctx->task_list); - if (node) - handle_tw_list(llist_reverse_order(node), &count); + if (node) { + node = llist_reverse_order(node); + node = io_handle_tw_list(node, count, max_entries); + } /* relaxed read is enough as only the task itself sets ->in_cancel */ if (unlikely(atomic_read(&tctx->in_cancel))) io_uring_drop_tctx_refs(current); - trace_io_uring_task_work_run(tctx, count); + trace_io_uring_task_work_run(tctx, *count); + return node; +} + +void tctx_task_work(struct callback_head *cb) +{ + struct io_uring_task *tctx; + struct llist_node *ret; + unsigned int count = 0; + + tctx = container_of(cb, struct io_uring_task, task_work); + ret = tctx_task_work_run(tctx, UINT_MAX, &count); + /* can't happen */ + WARN_ON_ONCE(ret); } static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags) @@ -1350,6 +1372,10 @@ static void io_req_normal_work_add(struct io_kiocb *req) if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); + /* SQPOLL doesn't need the task_work added, it'll run it itself */ + if (ctx->flags & IORING_SETUP_SQPOLL) + return; + if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method))) return; |