summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c64
1 files changed, 42 insertions, 22 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d3ee4fc532fa..e8e769be9ed0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1183,6 +1183,7 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_async_setup = 1,
+ .ioprio = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_RECVMSG] = {
@@ -1191,6 +1192,7 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.buffer_select = 1,
.needs_async_setup = 1,
+ .ioprio = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_TIMEOUT] = {
@@ -1266,6 +1268,7 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollout = 1,
.audit_skip = 1,
+ .ioprio = 1,
},
[IORING_OP_RECV] = {
.needs_file = 1,
@@ -1273,6 +1276,7 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.buffer_select = 1,
.audit_skip = 1,
+ .ioprio = 1,
},
[IORING_OP_OPENAT2] = {
},
@@ -1734,6 +1738,14 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
return;
/*
+ * READV uses fields in `struct io_rw` (len/addr) to stash the selected
+ * buffer data. However if that buffer is recycled the original request
+ * data stored in addr is lost. Therefore forbid recycling for now.
+ */
+ if (req->opcode == IORING_OP_READV)
+ return;
+
+ /*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
* If the tail has already been incremented, hang on to it.
@@ -1975,7 +1987,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_INFLIGHT)) {
req->flags |= REQ_F_INFLIGHT;
- atomic_inc(&current->io_uring->inflight_tracked);
+ atomic_inc(&req->task->io_uring->inflight_tracked);
}
}
@@ -3437,7 +3449,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
if (unlikely(res != req->cqe.res)) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
- req->flags |= REQ_F_REISSUE;
+ req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
return true;
}
req_set_fail(req);
@@ -3487,7 +3499,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
kiocb_end_write(req);
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
- req->flags |= REQ_F_REISSUE;
+ req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
return;
}
req->cqe.res = res;
@@ -4314,18 +4326,19 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret < 0))
return ret;
} else {
+ rw = req->async_data;
+ s = &rw->s;
+
/*
* Safe and required to re-import if we're using provided
* buffers, as we dropped the selected one before retry.
*/
- if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (io_do_buffer_select(req)) {
ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
if (unlikely(ret < 0))
return ret;
}
- rw = req->async_data;
- s = &rw->s;
/*
* We come here from an earlier attempt, restore our state to
* match in case it doesn't. It's cheap enough that we don't
@@ -5061,7 +5074,7 @@ static int io_uring_cmd_prep(struct io_kiocb *req,
{
struct io_uring_cmd *ioucmd = &req->uring_cmd;
- if (sqe->rw_flags)
+ if (sqe->rw_flags || sqe->__pad1)
return -EINVAL;
ioucmd->cmd = sqe->cmd;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
@@ -6075,14 +6088,12 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->file_index))
- return -EINVAL;
- if (unlikely(sqe->addr2 || sqe->file_index))
+ if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->addr2);
+ sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
@@ -6313,14 +6324,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->file_index))
- return -EINVAL;
- if (unlikely(sqe->addr2 || sqe->file_index))
+ if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->addr2);
+ sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
@@ -6954,7 +6963,8 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
io_req_complete_failed(req, ret);
}
-static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
+static void __io_poll_execute(struct io_kiocb *req, int mask,
+ __poll_t __maybe_unused events)
{
req->cqe.res = mask;
/*
@@ -6963,7 +6973,6 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
* CPU. We want to avoid pulling in req->apoll->events for that
* case.
*/
- req->apoll_events = events;
if (req->opcode == IORING_OP_POLL_ADD)
req->io_task_work.func = io_poll_task_func;
else
@@ -7114,6 +7123,8 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
io_init_poll_iocb(poll, mask, io_poll_wake);
poll->file = req->file;
+ req->apoll_events = poll->events;
+
ipt->pt._key = mask;
ipt->req = req;
ipt->error = 0;
@@ -7144,8 +7155,11 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
if (mask) {
/* can't multishot if failed, just queue the event we've got */
- if (unlikely(ipt->error || !ipt->nr_entries))
+ if (unlikely(ipt->error || !ipt->nr_entries)) {
poll->events |= EPOLLONESHOT;
+ req->apoll_events |= EPOLLONESHOT;
+ ipt->error = 0;
+ }
__io_poll_execute(req, mask, poll->events);
return 0;
}
@@ -7207,6 +7221,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
mask |= EPOLLEXCLUSIVE;
if (req->flags & REQ_F_POLLED) {
apoll = req->apoll;
+ kfree(apoll->double_poll);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
!list_empty(&ctx->apoll_cache)) {
apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
@@ -7392,7 +7407,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return -EINVAL;
io_req_set_refcount(req);
- req->apoll_events = poll->events = io_poll_parse_events(sqe, flags);
+ poll->events = io_poll_parse_events(sqe, flags);
return 0;
}
@@ -7405,6 +7420,8 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
ipt.pt._qproc = io_poll_queue_proc;
ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events);
+ if (!ret && ipt.error)
+ req_set_fail(req);
ret = ret ?: ipt.error;
if (ret)
__io_req_complete(req, issue_flags, ret, 0);
@@ -7964,6 +7981,9 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
struct file *file;
int ret, fd;
+ if (!req->ctx->file_data)
+ return -ENXIO;
+
for (done = 0; done < req->rsrc_update.nr_args; done++) {
if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
ret = -EFAULT;
@@ -12919,7 +12939,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg;
- struct io_buffer_list *bl;
+ struct io_buffer_list *bl, *free_bl = NULL;
struct page **pages;
int nr_pages;
@@ -12951,7 +12971,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
return -EEXIST;
} else {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
}
@@ -12960,7 +12980,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
struct_size(br, bufs, reg.ring_entries),
&nr_pages);
if (IS_ERR(pages)) {
- kfree(bl);
+ kfree(free_bl);
return PTR_ERR(pages);
}