summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 21:33:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 21:33:08 +0300
commit54e60e505d6144a22c787b5be1fdce996a27be1b (patch)
treea0b582fa8d9de216fef4fb6320199bb055125c65 /fs
parentd523ec4c6af4314575d6ab8b52629ae3e2039a50 (diff)
parent5d772916855f593672de55c437925daccc8ecd73 (diff)
downloadlinux-54e60e505d6144a22c787b5be1fdce996a27be1b.tar.xz
Merge tag 'for-6.2/io_uring-2022-12-08' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe: - Always ensure proper ordering in case of CQ ring overflow, which then means we can remove some work-arounds for that (Dylan) - Support completion batching for multishot, greatly increasing the efficiency for those (Dylan) - Flag epoll/eventfd wakeups done from io_uring, so that we can easily tell if we're recursing into io_uring again. Previously, this would have resulted in repeated multishot notifications if we had a dependency there. That could happen if an eventfd was registered as the ring eventfd, and we multishot polled for events on it. Or if an io_uring fd was added to epoll, and io_uring had a multishot request for the epoll fd. Test cases here: https://git.kernel.dk/cgit/liburing/commit/?id=919755a7d0096fda08fb6d65ac54ad8d0fe027cd Previously these got terminated when the CQ ring eventually overflowed, now it's handled gracefully (me). - Tightening of the IOPOLL based completions (Pavel) - Optimizations of the networking zero-copy paths (Pavel) - Various tweaks and fixes (Dylan, Pavel) * tag 'for-6.2/io_uring-2022-12-08' of git://git.kernel.dk/linux: (41 commits) io_uring: keep unlock_post inlined in hot path io_uring: don't use complete_post in kbuf io_uring: spelling fix io_uring: remove io_req_complete_post_tw io_uring: allow multishot polled reqs to defer completion io_uring: remove overflow param from io_post_aux_cqe io_uring: add lockdep assertion in io_fill_cqe_aux io_uring: make io_fill_cqe_aux static io_uring: add io_aux_cqe which allows deferred completion io_uring: allow defer completion for aux posted cqes io_uring: defer all io_req_complete_failed io_uring: always lock in io_apoll_task_func io_uring: remove iopoll spinlock io_uring: iopoll protect complete_post io_uring: inline __io_req_complete_put() io_uring: remove io_req_tw_post_queue io_uring: use io_req_task_complete() in timeout io_uring: hold locks for io_req_complete_failed io_uring: add completion locking for iopoll io_uring: kill io_cqring_ev_posted() and __io_cq_unlock_post() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/eventfd.c37
-rw-r--r--fs/eventpoll.c18
2 files changed, 31 insertions, 24 deletions
diff --git a/fs/eventfd.c b/fs/eventfd.c
index c0ffee99ad23..249ca6c0b784 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -43,21 +43,7 @@ struct eventfd_ctx {
int id;
};
-/**
- * eventfd_signal - Adds @n to the eventfd counter.
- * @ctx: [in] Pointer to the eventfd context.
- * @n: [in] Value of the counter to be added to the eventfd internal counter.
- * The value cannot be negative.
- *
- * This function is supposed to be called by the kernel in paths that do not
- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
- * value, and we signal this as overflow condition by returning a EPOLLERR
- * to poll(2).
- *
- * Returns the amount by which the counter was incremented. This will be less
- * than @n if the counter has overflowed.
- */
-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
{
unsigned long flags;
@@ -78,12 +64,31 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
- wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
current->in_eventfd = 0;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return n;
}
+
+/**
+ * eventfd_signal - Adds @n to the eventfd counter.
+ * @ctx: [in] Pointer to the eventfd context.
+ * @n: [in] Value of the counter to be added to the eventfd internal counter.
+ * The value cannot be negative.
+ *
+ * This function is supposed to be called by the kernel in paths that do not
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+ * value, and we signal this as overflow condition by returning a EPOLLERR
+ * to poll(2).
+ *
+ * Returns the amount by which the counter was incremented. This will be less
+ * than @n if the counter has overflowed.
+ */
+__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+{
+ return eventfd_signal_mask(ctx, n, 0);
+}
EXPORT_SYMBOL_GPL(eventfd_signal);
static void eventfd_free_ctx(struct eventfd_ctx *ctx)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 52954d4637b5..64659b110973 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -491,7 +491,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
+static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
+ unsigned pollflags)
{
struct eventpoll *ep_src;
unsigned long flags;
@@ -522,16 +523,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
}
spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
ep->nests = nests + 1;
- wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
+ wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
ep->nests = 0;
spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
}
#else
-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
+static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
+ unsigned pollflags)
{
- wake_up_poll(&ep->poll_wait, EPOLLIN);
+ wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
}
#endif
@@ -742,7 +744,7 @@ static void ep_free(struct eventpoll *ep)
/* We need to release all tasks waiting for these file */
if (waitqueue_active(&ep->poll_wait))
- ep_poll_safewake(ep, NULL);
+ ep_poll_safewake(ep, NULL, 0);
/*
* We need to lock this because we could be hit by
@@ -1208,7 +1210,7 @@ out_unlock:
/* We have to call this outside the lock */
if (pwake)
- ep_poll_safewake(ep, epi);
+ ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
if (!(epi->event.events & EPOLLEXCLUSIVE))
ewake = 1;
@@ -1553,7 +1555,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
/* We have to call this outside the lock */
if (pwake)
- ep_poll_safewake(ep, NULL);
+ ep_poll_safewake(ep, NULL, 0);
return 0;
}
@@ -1629,7 +1631,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
/* We have to call this outside the lock */
if (pwake)
- ep_poll_safewake(ep, NULL);
+ ep_poll_safewake(ep, NULL, 0);
return 0;
}