From af5d68f8892f8ee8f137648b79ceb2abc153a19b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 2 Feb 2024 10:20:05 -0700 Subject: io_uring/sqpoll: manage task_work privately Decouple from task_work running, and cap the number of entries we process at the time. If we exceed that number, push remaining entries to a retry list that we'll process first next time. We cap the number of entries to process at 8, which is fairly random. We just want to get enough per-ctx batching here, while not processing endlessly. Since we manually run PF_IO_WORKER related task_work anyway as the task never exits to userspace, with this we no longer need to add an actual task_work item to the per-process list. Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'io_uring/sqpoll.c') diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 65b5dbe3c850..28bf0e085d31 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -18,6 +18,7 @@ #include "sqpoll.h" #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 +#define IORING_TW_CAP_ENTRIES_VALUE 8 enum { IO_SQ_THREAD_SHOULD_STOP = 0, @@ -219,8 +220,31 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd) return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); } +/* + * Run task_work, processing the retry_list first. The retry_list holds + * entries that we passed on in the previous run, if we had more task_work + * than we were asked to process. Newly queued task_work isn't run until the + * retry list has been fully processed. + */ +static unsigned int io_sq_tw(struct llist_node **retry_list, int max_entries) +{ + struct io_uring_task *tctx = current->io_uring; + unsigned int count = 0; + + if (*retry_list) { + *retry_list = io_handle_tw_list(*retry_list, &count, max_entries); + if (count >= max_entries) + return count; + max_entries -= count; + } + + *retry_list = tctx_task_work_run(tctx, max_entries, &count); + return count; +} + static int io_sq_thread(void *data) { + struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; unsigned long timeout = 0; @@ -257,7 +281,7 @@ static int io_sq_thread(void *data) if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) sqt_spin = true; } - if (io_run_task_work()) + if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; if (sqt_spin || !time_after(jiffies, timeout)) { @@ -312,6 +336,9 @@ static int io_sq_thread(void *data) timeout = jiffies + sqd->sq_thread_idle; } + if (retry_list) + io_sq_tw(&retry_list, UINT_MAX); + io_uring_cancel_generic(true, sqd); sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) -- cgit v1.2.3