diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-03-10 19:31:29 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-03-10 19:31:29 +0300 |
commit | f331c5de7960d69fc767d2dc08f5f5859ce70061 (patch) | |
tree | 52cc2d35b1de98234bd7ad1ca53180dc7e15c3a5 | |
parent | 49be4fb28109b86a8ffe117415c306389a394cb2 (diff) | |
parent | fa780334a8c392d959ae05eb19f2410b3a1e6cb0 (diff) | |
download | linux-f331c5de7960d69fc767d2dc08f5f5859ce70061.tar.xz |
Merge tag 'io_uring-6.3-2023-03-09' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe:
- Stop setting PF_NO_SETAFFINITY on io-wq workers.
This has been reported in the past as it confuses some applications,
as some of their threads will fail with -1/EINVAL if attempted
affinitized. Most recent report was on cpusets, where enabling that
with io-wq workers active will fail.
Just deal with the mask changing by checking when a worker times out,
and then exit if we have no work pending.
- Fix an issue with passthrough support where we don't properly check
if the file type has pollable uring_cmd support.
- Fix a reported W=1 warning on a variable being set and unused. Add a
special helper for iterating these lists that doesn't save the
previous list element, if that iterator never ends up using it.
* tag 'io_uring-6.3-2023-03-09' of git://git.kernel.dk/linux:
io_uring: silence variable ‘prev’ set but not used warning
io_uring/uring_cmd: ensure that device supports IOPOLL
io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers
-rw-r--r-- | io_uring/io-wq.c | 16 | ||||
-rw-r--r-- | io_uring/io_uring.c | 4 | ||||
-rw-r--r-- | io_uring/slist.h | 5 | ||||
-rw-r--r-- | io_uring/uring_cmd.c | 4 |
4 files changed, 20 insertions, 9 deletions
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 411bb2d1acd4..f81c0a7136a5 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -616,7 +616,7 @@ static int io_wqe_worker(void *data) struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; - bool last_timeout = false; + bool exit_mask = false, last_timeout = false; char buf[TASK_COMM_LEN]; worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); @@ -632,8 +632,11 @@ static int io_wqe_worker(void *data) io_worker_handle_work(worker); raw_spin_lock(&wqe->lock); - /* timed out, exit unless we're the last worker */ - if (last_timeout && acct->nr_workers > 1) { + /* + * Last sleep timed out. Exit if we're not the last worker, + * or if someone modified our affinity. + */ + if (last_timeout && (exit_mask || acct->nr_workers > 1)) { acct->nr_workers--; raw_spin_unlock(&wqe->lock); __set_current_state(TASK_RUNNING); @@ -652,7 +655,11 @@ static int io_wqe_worker(void *data) continue; break; } - last_timeout = !ret; + if (!ret) { + last_timeout = true; + exit_mask = !cpumask_test_cpu(raw_smp_processor_id(), + wqe->cpu_mask); + } } if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) @@ -704,7 +711,6 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, tsk->worker_private = worker; worker->task = tsk; set_cpus_allowed_ptr(tsk, wqe->cpu_mask); - tsk->flags |= PF_NO_SETAFFINITY; raw_spin_lock(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index fd1cc35a1c00..722624b6d0dc 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node) static void __io_submit_flush_completions(struct io_ring_ctx *ctx) __must_hold(&ctx->uring_lock) { - struct io_wq_work_node *node, *prev; struct io_submit_state *state = &ctx->submit_state; + struct io_wq_work_node *node; __io_cq_lock(ctx); /* must come first to preserve CQE ordering in failure cases */ if (state->cqes_count) __io_flush_post_cqes(ctx); - wq_list_for_each(node, prev, &state->compl_reqs) { + __wq_list_for_each(node, &state->compl_reqs) { struct io_kiocb *req = container_of(node, struct io_kiocb, comp_list); diff --git a/io_uring/slist.h b/io_uring/slist.h index 7c198a40d5f1..0eb194817242 100644 --- a/io_uring/slist.h +++ b/io_uring/slist.h @@ -3,6 +3,9 @@ #include <linux/io_uring_types.h> +#define __wq_list_for_each(pos, head) \ + for (pos = (head)->first; pos; pos = (pos)->next) + #define wq_list_for_each(pos, prv, head) \ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) @@ -113,4 +116,4 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) return container_of(work->list.next, struct io_wq_work, list); } -#endif // INTERNAL_IO_SLIST_H
\ No newline at end of file +#endif // INTERNAL_IO_SLIST_H diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 446a189b78b0..2e4c483075d3 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -108,7 +108,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct file *file = req->file; int ret; - if (!req->file->f_op->uring_cmd) + if (!file->f_op->uring_cmd) return -EOPNOTSUPP; ret = security_uring_cmd(ioucmd); @@ -120,6 +120,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (ctx->flags & IORING_SETUP_CQE32) issue_flags |= IO_URING_F_CQE32; if (ctx->flags & IORING_SETUP_IOPOLL) { + if (!file->f_op->uring_cmd_iopoll) + return -EOPNOTSUPP; issue_flags |= IO_URING_F_IOPOLL; req->iopoll_completed = 0; WRITE_ONCE(ioucmd->cookie, NULL); |