summaryrefslogtreecommitdiff
path: root/io_uring/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r--io_uring/io_uring.c80
1 files changed, 42 insertions, 38 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 99a52f34b7d3..8840cf3e20f2 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
}
+static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
+{
+ return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
+}
+
static bool io_match_linked(struct io_kiocb *head)
{
struct io_kiocb *req;
@@ -1106,6 +1111,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
return;
+ /* need it for the following io_cqring_wake() */
+ smp_mb__after_atomic();
if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
io_move_task_work_from_local(ctx);
@@ -1117,8 +1124,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
if (ctx->has_evfd)
io_eventfd_signal(ctx);
- io_cqring_wake(ctx);
-
+ __io_cqring_wake(ctx);
}
static inline void __io_req_task_work_add(struct io_kiocb *req, bool allow_local)
@@ -1172,7 +1178,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
}
}
-int __io_run_local_work(struct io_ring_ctx *ctx, bool locked)
+int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked)
{
struct llist_node *node;
struct llist_node fake;
@@ -1191,7 +1197,7 @@ again:
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
- req->io_task_work.func(req, &locked);
+ req->io_task_work.func(req, locked);
ret++;
node = next;
}
@@ -1207,7 +1213,7 @@ again:
goto again;
}
- if (locked)
+ if (*locked)
io_submit_flush_completions(ctx);
trace_io_uring_local_work_run(ctx, ret, loops);
return ret;
@@ -1224,7 +1230,7 @@ int io_run_local_work(struct io_ring_ctx *ctx)
__set_current_state(TASK_RUNNING);
locked = mutex_trylock(&ctx->uring_lock);
- ret = __io_run_local_work(ctx, locked);
+ ret = __io_run_local_work(ctx, &locked);
if (locked)
mutex_unlock(&ctx->uring_lock);
@@ -1445,8 +1451,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
io_task_work_pending(ctx)) {
u32 tail = ctx->cached_cq_tail;
- if (!llist_empty(&ctx->work_llist))
- __io_run_local_work(ctx, true);
+ (void) io_run_local_work_locked(ctx);
if (task_work_pending(current) ||
wq_list_empty(&ctx->iopoll_list)) {
@@ -1586,8 +1591,6 @@ unsigned int io_file_get_flags(struct file *file)
res |= FFS_ISREG;
if (__io_file_supports_nowait(file, mode))
res |= FFS_NOWAIT;
- if (io_file_need_scm(file))
- res |= FFS_SCM;
return res;
}
@@ -1765,7 +1768,7 @@ int io_poll_issue(struct io_kiocb *req, bool *locked)
io_tw_lock(req->ctx, locked);
if (unlikely(req->task->flags & PF_EXITING))
return -EFAULT;
- return io_issue_sqe(req, IO_URING_F_NONBLOCK);
+ return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT);
}
struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
@@ -1859,7 +1862,6 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
io_req_set_rsrc_node(req, ctx, 0);
- WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
out:
io_ring_submit_unlock(ctx, issue_flags);
return file;
@@ -2318,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
struct io_ring_ctx *ctx = iowq->ctx;
- int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
+ int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
/*
* Wake up if we have enough events, or if a timeout occurred since we
@@ -2402,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
io_cqring_overflow_flush(ctx);
- if (io_cqring_events(ctx) >= min_events)
+ /* if user messes with these they will just get an early return */
+ if (__io_cqring_events_user(ctx) >= min_events)
return 0;
} while (ret > 0);
@@ -2562,18 +2565,14 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
static void io_req_caches_free(struct io_ring_ctx *ctx)
{
- struct io_submit_state *state = &ctx->submit_state;
int nr = 0;
mutex_lock(&ctx->uring_lock);
- io_flush_cached_locked_reqs(ctx, state);
+ io_flush_cached_locked_reqs(ctx, &ctx->submit_state);
while (!io_req_cache_empty(ctx)) {
- struct io_wq_work_node *node;
- struct io_kiocb *req;
+ struct io_kiocb *req = io_alloc_req(ctx);
- node = wq_stack_extract(&state->free_list);
- req = container_of(node, struct io_kiocb, comp_list);
kmem_cache_free(req_cachep, req);
nr++;
}
@@ -2585,12 +2584,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_sq_thread_finish(ctx);
-
- if (ctx->mm_account) {
- mmdrop(ctx->mm_account);
- ctx->mm_account = NULL;
- }
-
io_rsrc_refs_drop(ctx);
/* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
io_wait_rsrc_data(ctx->buf_data);
@@ -2631,8 +2624,11 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
}
#endif
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
- WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
+ if (ctx->mm_account) {
+ mmdrop(ctx->mm_account);
+ ctx->mm_account = NULL;
+ }
io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
@@ -2813,15 +2809,12 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_poll_remove_all(ctx, NULL, true);
mutex_unlock(&ctx->uring_lock);
- /* failed during ring init, it couldn't have issued any requests */
- if (ctx->rings) {
+ /*
+ * If we failed setting up the ctx, we might not have any rings
+ * and therefore did not submit any requests
+ */
+ if (ctx->rings)
io_kill_timeouts(ctx, NULL, true);
- /* if we failed setting up the ctx, we might not have any rings */
- io_iopoll_try_reap_events(ctx);
- /* drop cached put refs after potentially doing completions */
- if (current->io_uring)
- io_uring_drop_tctx_refs(current);
- }
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
/*
@@ -3229,8 +3222,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
mutex_unlock(&ctx->uring_lock);
goto out;
}
- if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
- goto iopoll_locked;
+ if (flags & IORING_ENTER_GETEVENTS) {
+ if (ctx->syscall_iopoll)
+ goto iopoll_locked;
+ /*
+ * Ignore errors, we'll soon call io_cqring_wait() and
+ * it should handle ownership problems if any.
+ */
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ (void)io_run_local_work_locked(ctx);
+ }
mutex_unlock(&ctx->uring_lock);
}
@@ -3355,7 +3356,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
if (fd < 0)
return fd;
- ret = __io_uring_add_tctx_node(ctx, false);
+ ret = __io_uring_add_tctx_node(ctx);
if (ret) {
put_unused_fd(fd);
return ret;
@@ -3890,6 +3891,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
return -ENXIO;
+ if (ctx->submitter_task && ctx->submitter_task != current)
+ return -EEXIST;
+
if (ctx->restricted) {
if (opcode >= IORING_REGISTER_LAST)
return -EINVAL;