summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDylan Yudaken <dylany@fb.com>2022-06-22 16:40:23 +0300
committerJens Axboe <axboe@kernel.dk>2022-07-25 03:39:15 +0300
commitf88262e60bb9cb5740891672ce9f405e7f9393e5 (patch)
treebca01661645c35ef7958fd5b0a5f9438d9b9b477
parentc34398a8c018e0d3d2d30b718d03c7290c696f51 (diff)
downloadlinux-f88262e60bb9cb5740891672ce9f405e7f9393e5.tar.xz
io_uring: lockless task list
With networking use cases we see contention on the spinlock used to protect the task_list when multiple threads try and add completions at once. Instead we can use a lockless list, and assume that the first caller to add to the list is responsible for kicking off task work. Signed-off-by: Dylan Yudaken <dylany@fb.com> Link: https://lore.kernel.org/r/20220622134028.2013417-4-dylany@fb.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--include/linux/io_uring_types.h2
-rw-r--r--io_uring/io_uring.c38
-rw-r--r--io_uring/tctx.c3
-rw-r--r--io_uring/tctx.h6
4 files changed, 14 insertions, 35 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 5987f8acca38..918165a20053 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -428,7 +428,7 @@ typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
struct io_task_work {
union {
- struct io_wq_work_node node;
+ struct llist_node node;
struct llist_node fallback_node;
};
io_req_tw_func_t func;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index bf7ca2b279d3..0124335c6d09 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -986,11 +986,12 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
percpu_ref_put(&ctx->refs);
}
-static void handle_tw_list(struct io_wq_work_node *node,
+
+static void handle_tw_list(struct llist_node *node,
struct io_ring_ctx **ctx, bool *locked)
{
do {
- struct io_wq_work_node *next = node->next;
+ struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
@@ -1014,23 +1015,11 @@ void tctx_task_work(struct callback_head *cb)
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
+ struct llist_node *node = llist_del_all(&tctx->task_list);
- while (1) {
- struct io_wq_work_node *node;
-
- spin_lock_irq(&tctx->task_lock);
- node = tctx->task_list.first;
- INIT_WQ_LIST(&tctx->task_list);
- if (!node)
- tctx->task_running = false;
- spin_unlock_irq(&tctx->task_lock);
- if (!node)
- break;
+ if (node) {
handle_tw_list(node, &ctx, &uring_locked);
cond_resched();
-
- if (data_race(!tctx->task_list.first) && uring_locked)
- io_submit_flush_completions(ctx);
}
ctx_flush_and_put(ctx, &uring_locked);
@@ -1044,16 +1033,10 @@ void io_req_task_work_add(struct io_kiocb *req)
{
struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;
- struct io_wq_work_node *node;
- unsigned long flags;
+ struct llist_node *node;
bool running;
- spin_lock_irqsave(&tctx->task_lock, flags);
- wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
- running = tctx->task_running;
- if (!running)
- tctx->task_running = true;
- spin_unlock_irqrestore(&tctx->task_lock, flags);
+ running = !llist_add(&req->io_task_work.node, &tctx->task_list);
/* task_work already pending, we're done */
if (running)
@@ -1065,11 +1048,8 @@ void io_req_task_work_add(struct io_kiocb *req)
if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
return;
- spin_lock_irqsave(&tctx->task_lock, flags);
- tctx->task_running = false;
- node = tctx->task_list.first;
- INIT_WQ_LIST(&tctx->task_list);
- spin_unlock_irqrestore(&tctx->task_lock, flags);
+
+ node = llist_del_all(&tctx->task_list);
while (node) {
req = container_of(node, struct io_kiocb, io_task_work.node);
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 7a68ba9beec3..7f97d97fef0a 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -86,8 +86,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
atomic_set(&tctx->in_idle, 0);
atomic_set(&tctx->inflight_tracked, 0);
task->io_uring = tctx;
- spin_lock_init(&tctx->task_lock);
- INIT_WQ_LIST(&tctx->task_list);
+ init_llist_head(&tctx->task_list);
init_task_work(&tctx->task_work, tctx_task_work);
return 0;
}
diff --git a/io_uring/tctx.h b/io_uring/tctx.h
index c8566ea5dca4..8a33ff6e5d91 100644
--- a/io_uring/tctx.h
+++ b/io_uring/tctx.h
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/llist.h>
+
/*
* Arbitrary limit, can be raised if need be
*/
@@ -19,9 +21,7 @@ struct io_uring_task {
struct percpu_counter inflight;
struct { /* task_work */
- spinlock_t task_lock;
- bool task_running;
- struct io_wq_work_list task_list;
+ struct llist_head task_list;
struct callback_head task_work;
} ____cacheline_aligned_in_smp;
};