diff options
Diffstat (limited to 'meta-openembedded/meta-oe/recipes-devtools/nodejs/nodejs/0002-Revert-io_uring-changes-from-libuv-1.45.0.patch')
-rw-r--r-- | meta-openembedded/meta-oe/recipes-devtools/nodejs/nodejs/0002-Revert-io_uring-changes-from-libuv-1.45.0.patch | 1803 |
1 files changed, 1803 insertions, 0 deletions
diff --git a/meta-openembedded/meta-oe/recipes-devtools/nodejs/nodejs/0002-Revert-io_uring-changes-from-libuv-1.45.0.patch b/meta-openembedded/meta-oe/recipes-devtools/nodejs/nodejs/0002-Revert-io_uring-changes-from-libuv-1.45.0.patch new file mode 100644 index 0000000000..77cd53b759 --- /dev/null +++ b/meta-openembedded/meta-oe/recipes-devtools/nodejs/nodejs/0002-Revert-io_uring-changes-from-libuv-1.45.0.patch @@ -0,0 +1,1803 @@ +From 6d2ef4c8ba2304ee4941a6719b3ad9bd63e415a9 Mon Sep 17 00:00:00 2001 +From: Martin Jansa <martin.jansa@gmail.com> +Date: Wed, 18 Oct 2023 21:09:44 +0200 +Subject: [PATCH] Revert io_uring changes from libuv-1.45.0 + +This reverts https://github.com/libuv/libuv/pull/3952/commits/26c79a942b92573a1388c0ee8a6ad4397f009318 + +Included in nodejs-20.3.0 with the libuv upgrade to 1.45.0 in: +https://github.com/nodejs/node/commit/bfcb3d1d9a876f399013d326bd65804f9eda77e4 + +Reverted libuv commits: +Revert "linux: fix WRITEV with lots of bufs using io_uring (#4004)" +This reverts commit ef6a9a624df0a00687037474025a3608472f722a. +Revert "linux: work around EOWNERDEAD io_uring kernel bug (#4002)" +This reverts commit d23a20f62cc50b9fd7694992263f1d296d8f5cb4. +Revert "unix: handle CQ overflow in iou ring (#3991)" +This reverts commit 30fc896cc1b5822e9f1eb462587fe4b368a6215c. +Revert "unix: constrained_memory should return UINT64_MAX (#3753)" +This reverts commit 6ad347fae4520f39520d34bd7c7f5ddafab13a69. +Revert "linux: use io_uring to batch epoll_ctl calls (#3979)" +This reverts commit 6e073ef5daf93b708a654008959b823b58029e88. +Revert "linux: fix logic bug in sqe ring space check (#3980)" +This reverts commit f27208224084fc972b9d2802486d97ef31b51a39. +Revert "src: fix events/events_waiting metrics counter (#3957)" +This reverts commit e02642cf3b768b2c58a41f97fa38507e032ae415. +Revert "linux: remove bug workaround for obsolete kernels (#3965)" +This reverts commit 1c935a34454167b23f8eef7f0f63d7119f0de747. +Revert "linux: add IORING_OP_CLOSE support (#3964)" +This reverts commit dfae365f844e127621128a76bce7165e3f99a8d9. +Revert "linux: add IORING_OP_OPENAT support (#3963)" +This reverts commit 5ca5e475bb1711e65323ef1594a31818e5a1a9eb. +Revert "linux: fix academic valgrind warning (#3960)" +This reverts commit a7ff759ca1deacb2e0e6ae3c2d3dce91cc637dfe. +Revert "linux: introduce io_uring support (#3952)" +This reverts commit d2c31f429b87b476a7f1344d145dad4752a406d4. + +Dropped deps/uv/docs deps/uv/test changes as these dirs aren't included +in nodejs tarballs. + +Signed-off-by: Martin Jansa <martin.jansa@gmail.com> +--- +Upstream-Status: Inappropriate [OE specific] + + deps/uv/docs/src/fs.rst | 6 - + deps/uv/docs/src/misc.rst | 5 +- + deps/uv/src/threadpool.c | 21 - + deps/uv/src/unix/aix.c | 11 +- + deps/uv/src/unix/fs.c | 57 +- + deps/uv/src/unix/internal.h | 23 - + deps/uv/src/unix/kqueue.c | 10 +- + deps/uv/src/unix/linux.c | 998 +++----------------------- + deps/uv/src/unix/os390.c | 11 +- + deps/uv/src/unix/posix-poll.c | 11 +- + deps/uv/src/uv-common.h | 28 - + deps/uv/src/win/core.c | 20 +- + deps/uv/test/test-fs.c | 33 - + deps/uv/test/test-list.h | 4 - + deps/uv/test/test-metrics.c | 151 ---- + deps/uv/test/test-threadpool-cancel.c | 34 +- + 16 files changed, 138 insertions(+), 1285 deletions(-) + +diff --git a/deps/uv/src/threadpool.c b/deps/uv/src/threadpool.c +index 51962bf0021..a3da53026f9 100644 +--- a/deps/uv/src/threadpool.c ++++ b/deps/uv/src/threadpool.c +@@ -275,13 +275,9 @@ void uv__work_submit(uv_loop_t* loop, + } + + +-/* TODO(bnoordhuis) teach libuv how to cancel file operations +- * that go through io_uring instead of the thread pool. +- */ + static int uv__work_cancel(uv_loop_t* loop, uv_req_t* req, struct uv__work* w) { + int cancelled; + +- uv_once(&once, init_once); /* Ensure |mutex| is initialized. */ + uv_mutex_lock(&mutex); + uv_mutex_lock(&w->loop->wq_mutex); + +@@ -311,15 +307,12 @@ void uv__work_done(uv_async_t* handle) { + QUEUE* q; + QUEUE wq; + int err; +- int nevents; + + loop = container_of(handle, uv_loop_t, wq_async); + uv_mutex_lock(&loop->wq_mutex); + QUEUE_MOVE(&loop->wq, &wq); + uv_mutex_unlock(&loop->wq_mutex); + +- nevents = 0; +- + while (!QUEUE_EMPTY(&wq)) { + q = QUEUE_HEAD(&wq); + QUEUE_REMOVE(q); +@@ -327,20 +320,6 @@ void uv__work_done(uv_async_t* handle) { + w = container_of(q, struct uv__work, wq); + err = (w->work == uv__cancelled) ? UV_ECANCELED : 0; + w->done(w, err); +- nevents++; +- } +- +- /* This check accomplishes 2 things: +- * 1. Even if the queue was empty, the call to uv__work_done() should count +- * as an event. Which will have been added by the event loop when +- * calling this callback. +- * 2. Prevents accidental wrap around in case nevents == 0 events == 0. +- */ +- if (nevents > 1) { +- /* Subtract 1 to counter the call to uv__work_done(). */ +- uv__metrics_inc_events(loop, nevents - 1); +- if (uv__get_internal_fields(loop)->current_timeout == 0) +- uv__metrics_inc_events_waiting(loop, nevents - 1); + } + } + +diff --git a/deps/uv/src/unix/aix.c b/deps/uv/src/unix/aix.c +index f1afbed49ec..b855282ebc8 100644 +--- a/deps/uv/src/unix/aix.c ++++ b/deps/uv/src/unix/aix.c +@@ -131,7 +131,6 @@ int uv__io_check_fd(uv_loop_t* loop, int fd) { + + + void uv__io_poll(uv_loop_t* loop, int timeout) { +- uv__loop_internal_fields_t* lfields; + struct pollfd events[1024]; + struct pollfd pqry; + struct pollfd* pe; +@@ -155,8 +154,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + return; + } + +- lfields = uv__get_internal_fields(loop); +- + while (!QUEUE_EMPTY(&loop->watcher_queue)) { + q = QUEUE_HEAD(&loop->watcher_queue); + QUEUE_REMOVE(q); +@@ -220,7 +217,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + base = loop->time; + count = 48; /* Benchmarks suggest this gives the best throughput. */ + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -235,12 +232,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + if (timeout != 0) + uv__metrics_set_provider_entry_time(loop); + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. +- */ +- lfields->current_timeout = timeout; +- + nfds = pollset_poll(loop->backend_fd, + events, + ARRAY_SIZE(events), +diff --git a/deps/uv/src/unix/fs.c b/deps/uv/src/unix/fs.c +index 00d385c24b7..c696a8d5500 100644 +--- a/deps/uv/src/unix/fs.c ++++ b/deps/uv/src/unix/fs.c +@@ -61,11 +61,10 @@ + #endif + + #if defined(__linux__) +-# include <sys/sendfile.h> +-# include <sys/utsname.h> ++# include "sys/utsname.h" + #endif + +-#if defined(__sun) ++#if defined(__linux__) || defined(__sun) + # include <sys/sendfile.h> + # include <sys/sysmacros.h> + #endif +@@ -1554,7 +1553,26 @@ static int uv__fs_statx(int fd, + return UV_ENOSYS; + } + +- uv__statx_to_stat(&statxbuf, buf); ++ buf->st_dev = makedev(statxbuf.stx_dev_major, statxbuf.stx_dev_minor); ++ buf->st_mode = statxbuf.stx_mode; ++ buf->st_nlink = statxbuf.stx_nlink; ++ buf->st_uid = statxbuf.stx_uid; ++ buf->st_gid = statxbuf.stx_gid; ++ buf->st_rdev = makedev(statxbuf.stx_rdev_major, statxbuf.stx_rdev_minor); ++ buf->st_ino = statxbuf.stx_ino; ++ buf->st_size = statxbuf.stx_size; ++ buf->st_blksize = statxbuf.stx_blksize; ++ buf->st_blocks = statxbuf.stx_blocks; ++ buf->st_atim.tv_sec = statxbuf.stx_atime.tv_sec; ++ buf->st_atim.tv_nsec = statxbuf.stx_atime.tv_nsec; ++ buf->st_mtim.tv_sec = statxbuf.stx_mtime.tv_sec; ++ buf->st_mtim.tv_nsec = statxbuf.stx_mtime.tv_nsec; ++ buf->st_ctim.tv_sec = statxbuf.stx_ctime.tv_sec; ++ buf->st_ctim.tv_nsec = statxbuf.stx_ctime.tv_nsec; ++ buf->st_birthtim.tv_sec = statxbuf.stx_btime.tv_sec; ++ buf->st_birthtim.tv_nsec = statxbuf.stx_btime.tv_nsec; ++ buf->st_flags = 0; ++ buf->st_gen = 0; + + return 0; + #else +@@ -1798,9 +1816,6 @@ int uv_fs_chown(uv_loop_t* loop, + int uv_fs_close(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) { + INIT(CLOSE); + req->file = file; +- if (cb != NULL) +- if (uv__iou_fs_close(loop, req)) +- return 0; + POST; + } + +@@ -1848,9 +1863,6 @@ int uv_fs_lchown(uv_loop_t* loop, + int uv_fs_fdatasync(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) { + INIT(FDATASYNC); + req->file = file; +- if (cb != NULL) +- if (uv__iou_fs_fsync_or_fdatasync(loop, req, /* IORING_FSYNC_DATASYNC */ 1)) +- return 0; + POST; + } + +@@ -1858,9 +1870,6 @@ int uv_fs_fdatasync(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) { + int uv_fs_fstat(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) { + INIT(FSTAT); + req->file = file; +- if (cb != NULL) +- if (uv__iou_fs_statx(loop, req, /* is_fstat */ 1, /* is_lstat */ 0)) +- return 0; + POST; + } + +@@ -1868,9 +1877,6 @@ int uv_fs_fstat(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) { + int uv_fs_fsync(uv_loop_t* loop, uv_fs_t* req, uv_file file, uv_fs_cb cb) { + INIT(FSYNC); + req->file = file; +- if (cb != NULL) +- if (uv__iou_fs_fsync_or_fdatasync(loop, req, /* no flags */ 0)) +- return 0; + POST; + } + +@@ -1917,9 +1923,6 @@ int uv_fs_lutime(uv_loop_t* loop, + int uv_fs_lstat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) { + INIT(LSTAT); + PATH; +- if (cb != NULL) +- if (uv__iou_fs_statx(loop, req, /* is_fstat */ 0, /* is_lstat */ 1)) +- return 0; + POST; + } + +@@ -1981,9 +1984,6 @@ int uv_fs_open(uv_loop_t* loop, + PATH; + req->flags = flags; + req->mode = mode; +- if (cb != NULL) +- if (uv__iou_fs_open(loop, req)) +- return 0; + POST; + } + +@@ -2012,11 +2012,6 @@ int uv_fs_read(uv_loop_t* loop, uv_fs_t* req, + memcpy(req->bufs, bufs, nbufs * sizeof(*bufs)); + + req->off = off; +- +- if (cb != NULL) +- if (uv__iou_fs_read_or_write(loop, req, /* is_read */ 1)) +- return 0; +- + POST; + } + +@@ -2124,9 +2119,6 @@ int uv_fs_sendfile(uv_loop_t* loop, + int uv_fs_stat(uv_loop_t* loop, uv_fs_t* req, const char* path, uv_fs_cb cb) { + INIT(STAT); + PATH; +- if (cb != NULL) +- if (uv__iou_fs_statx(loop, req, /* is_fstat */ 0, /* is_lstat */ 0)) +- return 0; + POST; + } + +@@ -2190,11 +2182,6 @@ int uv_fs_write(uv_loop_t* loop, + memcpy(req->bufs, bufs, nbufs * sizeof(*bufs)); + + req->off = off; +- +- if (cb != NULL) +- if (uv__iou_fs_read_or_write(loop, req, /* is_read */ 0)) +- return 0; +- + POST; + } + +diff --git a/deps/uv/src/unix/internal.h b/deps/uv/src/unix/internal.h +index 6c5822e6a0d..d439ae6dd3d 100644 +--- a/deps/uv/src/unix/internal.h ++++ b/deps/uv/src/unix/internal.h +@@ -329,28 +329,6 @@ int uv__random_getentropy(void* buf, size_t buflen); + int uv__random_readpath(const char* path, void* buf, size_t buflen); + int uv__random_sysctl(void* buf, size_t buflen); + +-/* io_uring */ +-#ifdef __linux__ +-int uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req); +-int uv__iou_fs_fsync_or_fdatasync(uv_loop_t* loop, +- uv_fs_t* req, +- uint32_t fsync_flags); +-int uv__iou_fs_open(uv_loop_t* loop, uv_fs_t* req); +-int uv__iou_fs_read_or_write(uv_loop_t* loop, +- uv_fs_t* req, +- int is_read); +-int uv__iou_fs_statx(uv_loop_t* loop, +- uv_fs_t* req, +- int is_fstat, +- int is_lstat); +-#else +-#define uv__iou_fs_close(loop, req) 0 +-#define uv__iou_fs_fsync_or_fdatasync(loop, req, fsync_flags) 0 +-#define uv__iou_fs_open(loop, req) 0 +-#define uv__iou_fs_read_or_write(loop, req, is_read) 0 +-#define uv__iou_fs_statx(loop, req, is_fstat, is_lstat) 0 +-#endif +- + #if defined(__APPLE__) + int uv___stream_fd(const uv_stream_t* handle); + #define uv__stream_fd(handle) (uv___stream_fd((const uv_stream_t*) (handle))) +@@ -427,7 +405,6 @@ int uv__statx(int dirfd, + int flags, + unsigned int mask, + struct uv__statx* statxbuf); +-void uv__statx_to_stat(const struct uv__statx* statxbuf, uv_stat_t* buf); + ssize_t uv__getrandom(void* buf, size_t buflen, unsigned flags); + #endif + +diff --git a/deps/uv/src/unix/kqueue.c b/deps/uv/src/unix/kqueue.c +index 82916d65933..deb486bae7a 100644 +--- a/deps/uv/src/unix/kqueue.c ++++ b/deps/uv/src/unix/kqueue.c +@@ -127,7 +127,6 @@ static void uv__kqueue_delete(int kqfd, const struct kevent *ev) { + + + void uv__io_poll(uv_loop_t* loop, int timeout) { +- uv__loop_internal_fields_t* lfields; + struct kevent events[1024]; + struct kevent* ev; + struct timespec spec; +@@ -156,7 +155,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + return; + } + +- lfields = uv__get_internal_fields(loop); + nevents = 0; + + while (!QUEUE_EMPTY(&loop->watcher_queue)) { +@@ -224,7 +222,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + base = loop->time; + count = 48; /* Benchmarks suggest this gives the best throughput. */ + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -247,12 +245,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + if (pset != NULL) + pthread_sigmask(SIG_BLOCK, pset, NULL); + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. +- */ +- lfields->current_timeout = timeout; +- + nfds = kevent(loop->backend_fd, + events, + nevents, +diff --git a/deps/uv/src/unix/linux.c b/deps/uv/src/unix/linux.c +index 5f84ad0eea3..343e37f2527 100644 +--- a/deps/uv/src/unix/linux.c ++++ b/deps/uv/src/unix/linux.c +@@ -27,8 +27,6 @@ + #include "internal.h" + + #include <inttypes.h> +-#include <stdatomic.h> +-#include <stddef.h> /* offsetof */ + #include <stdint.h> + #include <stdio.h> + #include <stdlib.h> +@@ -40,29 +38,15 @@ + #include <net/if.h> + #include <sys/epoll.h> + #include <sys/inotify.h> +-#include <sys/mman.h> + #include <sys/param.h> + #include <sys/prctl.h> + #include <sys/stat.h> + #include <sys/syscall.h> + #include <sys/sysinfo.h> +-#include <sys/sysmacros.h> + #include <sys/types.h> + #include <time.h> + #include <unistd.h> + +-#ifndef __NR_io_uring_setup +-# define __NR_io_uring_setup 425 +-#endif +- +-#ifndef __NR_io_uring_enter +-# define __NR_io_uring_enter 426 +-#endif +- +-#ifndef __NR_io_uring_register +-# define __NR_io_uring_register 427 +-#endif +- + #ifndef __NR_copy_file_range + # if defined(__x86_64__) + # define __NR_copy_file_range 326 +@@ -132,129 +116,6 @@ + # include <netpacket/packet.h> + #endif /* HAVE_IFADDRS_H */ + +-enum { +- UV__IORING_SETUP_SQPOLL = 2u, +-}; +- +-enum { +- UV__IORING_FEAT_SINGLE_MMAP = 1u, +- UV__IORING_FEAT_NODROP = 2u, +- UV__IORING_FEAT_RSRC_TAGS = 1024u, /* linux v5.13 */ +-}; +- +-enum { +- UV__IORING_OP_READV = 1, +- UV__IORING_OP_WRITEV = 2, +- UV__IORING_OP_FSYNC = 3, +- UV__IORING_OP_OPENAT = 18, +- UV__IORING_OP_CLOSE = 19, +- UV__IORING_OP_STATX = 21, +- UV__IORING_OP_EPOLL_CTL = 29, +-}; +- +-enum { +- UV__IORING_ENTER_GETEVENTS = 1u, +- UV__IORING_ENTER_SQ_WAKEUP = 2u, +-}; +- +-enum { +- UV__IORING_SQ_NEED_WAKEUP = 1u, +- UV__IORING_SQ_CQ_OVERFLOW = 2u, +-}; +- +-struct uv__io_cqring_offsets { +- uint32_t head; +- uint32_t tail; +- uint32_t ring_mask; +- uint32_t ring_entries; +- uint32_t overflow; +- uint32_t cqes; +- uint64_t reserved0; +- uint64_t reserved1; +-}; +- +-STATIC_ASSERT(40 == sizeof(struct uv__io_cqring_offsets)); +- +-struct uv__io_sqring_offsets { +- uint32_t head; +- uint32_t tail; +- uint32_t ring_mask; +- uint32_t ring_entries; +- uint32_t flags; +- uint32_t dropped; +- uint32_t array; +- uint32_t reserved0; +- uint64_t reserved1; +-}; +- +-STATIC_ASSERT(40 == sizeof(struct uv__io_sqring_offsets)); +- +-struct uv__io_uring_cqe { +- uint64_t user_data; +- int32_t res; +- uint32_t flags; +-}; +- +-STATIC_ASSERT(16 == sizeof(struct uv__io_uring_cqe)); +- +-struct uv__io_uring_sqe { +- uint8_t opcode; +- uint8_t flags; +- uint16_t ioprio; +- int32_t fd; +- union { +- uint64_t off; +- uint64_t addr2; +- }; +- union { +- uint64_t addr; +- }; +- uint32_t len; +- union { +- uint32_t rw_flags; +- uint32_t fsync_flags; +- uint32_t open_flags; +- uint32_t statx_flags; +- }; +- uint64_t user_data; +- union { +- uint16_t buf_index; +- uint64_t pad[3]; +- }; +-}; +- +-STATIC_ASSERT(64 == sizeof(struct uv__io_uring_sqe)); +-STATIC_ASSERT(0 == offsetof(struct uv__io_uring_sqe, opcode)); +-STATIC_ASSERT(1 == offsetof(struct uv__io_uring_sqe, flags)); +-STATIC_ASSERT(2 == offsetof(struct uv__io_uring_sqe, ioprio)); +-STATIC_ASSERT(4 == offsetof(struct uv__io_uring_sqe, fd)); +-STATIC_ASSERT(8 == offsetof(struct uv__io_uring_sqe, off)); +-STATIC_ASSERT(16 == offsetof(struct uv__io_uring_sqe, addr)); +-STATIC_ASSERT(24 == offsetof(struct uv__io_uring_sqe, len)); +-STATIC_ASSERT(28 == offsetof(struct uv__io_uring_sqe, rw_flags)); +-STATIC_ASSERT(32 == offsetof(struct uv__io_uring_sqe, user_data)); +-STATIC_ASSERT(40 == offsetof(struct uv__io_uring_sqe, buf_index)); +- +-struct uv__io_uring_params { +- uint32_t sq_entries; +- uint32_t cq_entries; +- uint32_t flags; +- uint32_t sq_thread_cpu; +- uint32_t sq_thread_idle; +- uint32_t features; +- uint32_t reserved[4]; +- struct uv__io_sqring_offsets sq_off; /* 40 bytes */ +- struct uv__io_cqring_offsets cq_off; /* 40 bytes */ +-}; +- +-STATIC_ASSERT(40 + 40 + 40 == sizeof(struct uv__io_uring_params)); +-STATIC_ASSERT(40 == offsetof(struct uv__io_uring_params, sq_off)); +-STATIC_ASSERT(80 == offsetof(struct uv__io_uring_params, cq_off)); +- +-STATIC_ASSERT(EPOLL_CTL_ADD < 4); +-STATIC_ASSERT(EPOLL_CTL_DEL < 4); +-STATIC_ASSERT(EPOLL_CTL_MOD < 4); +- + struct watcher_list { + RB_ENTRY(watcher_list) entry; + QUEUE watchers; +@@ -276,17 +137,6 @@ static int compare_watchers(const struct watcher_list* a, + static void maybe_free_watcher_list(struct watcher_list* w, + uv_loop_t* loop); + +-static void uv__epoll_ctl_flush(int epollfd, +- struct uv__iou* ctl, +- struct epoll_event (*events)[256]); +- +-static void uv__epoll_ctl_prep(int epollfd, +- struct uv__iou* ctl, +- struct epoll_event (*events)[256], +- int op, +- int fd, +- struct epoll_event* e); +- + RB_GENERATE_STATIC(watcher_root, watcher_list, entry, compare_watchers) + + +@@ -356,187 +206,7 @@ ssize_t uv__getrandom(void* buf, size_t buflen, unsigned flags) { + } + + +-int uv__io_uring_setup(int entries, struct uv__io_uring_params* params) { +- return syscall(__NR_io_uring_setup, entries, params); +-} +- +- +-int uv__io_uring_enter(int fd, +- unsigned to_submit, +- unsigned min_complete, +- unsigned flags) { +- /* io_uring_enter used to take a sigset_t but it's unused +- * in newer kernels unless IORING_ENTER_EXT_ARG is set, +- * in which case it takes a struct io_uring_getevents_arg. +- */ +- return syscall(__NR_io_uring_enter, +- fd, +- to_submit, +- min_complete, +- flags, +- NULL, +- 0L); +-} +- +- +-int uv__io_uring_register(int fd, unsigned opcode, void* arg, unsigned nargs) { +- return syscall(__NR_io_uring_register, fd, opcode, arg, nargs); +-} +- +- +-static int uv__use_io_uring(void) { +- /* Ternary: unknown=0, yes=1, no=-1 */ +- static _Atomic int use_io_uring; +- char* val; +- int use; +- +- use = atomic_load_explicit(&use_io_uring, memory_order_relaxed); +- +- if (use == 0) { +- val = getenv("UV_USE_IO_URING"); +- use = val == NULL || atoi(val) ? 1 : -1; +- atomic_store_explicit(&use_io_uring, use, memory_order_relaxed); +- } +- +- return use > 0; +-} +- +- +-static void uv__iou_init(int epollfd, +- struct uv__iou* iou, +- uint32_t entries, +- uint32_t flags) { +- struct uv__io_uring_params params; +- struct epoll_event e; +- size_t cqlen; +- size_t sqlen; +- size_t maxlen; +- size_t sqelen; +- uint32_t i; +- char* sq; +- char* sqe; +- int ringfd; +- +- sq = MAP_FAILED; +- sqe = MAP_FAILED; +- +- if (!uv__use_io_uring()) +- return; +- +- /* SQPOLL required CAP_SYS_NICE until linux v5.12 relaxed that requirement. +- * Mostly academic because we check for a v5.13 kernel afterwards anyway. +- */ +- memset(¶ms, 0, sizeof(params)); +- params.flags = flags; +- +- if (flags & UV__IORING_SETUP_SQPOLL) +- params.sq_thread_idle = 10; /* milliseconds */ +- +- /* Kernel returns a file descriptor with O_CLOEXEC flag set. */ +- ringfd = uv__io_uring_setup(entries, ¶ms); +- if (ringfd == -1) +- return; +- +- /* IORING_FEAT_RSRC_TAGS is used to detect linux v5.13 but what we're +- * actually detecting is whether IORING_OP_STATX works with SQPOLL. +- */ +- if (!(params.features & UV__IORING_FEAT_RSRC_TAGS)) +- goto fail; +- +- /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */ +- if (!(params.features & UV__IORING_FEAT_SINGLE_MMAP)) +- goto fail; +- +- /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */ +- if (!(params.features & UV__IORING_FEAT_NODROP)) +- goto fail; +- +- sqlen = params.sq_off.array + params.sq_entries * sizeof(uint32_t); +- cqlen = +- params.cq_off.cqes + params.cq_entries * sizeof(struct uv__io_uring_cqe); +- maxlen = sqlen < cqlen ? cqlen : sqlen; +- sqelen = params.sq_entries * sizeof(struct uv__io_uring_sqe); +- +- sq = mmap(0, +- maxlen, +- PROT_READ | PROT_WRITE, +- MAP_SHARED | MAP_POPULATE, +- ringfd, +- 0); /* IORING_OFF_SQ_RING */ +- +- sqe = mmap(0, +- sqelen, +- PROT_READ | PROT_WRITE, +- MAP_SHARED | MAP_POPULATE, +- ringfd, +- 0x10000000ull); /* IORING_OFF_SQES */ +- +- if (sq == MAP_FAILED || sqe == MAP_FAILED) +- goto fail; +- +- if (flags & UV__IORING_SETUP_SQPOLL) { +- /* Only interested in completion events. To get notified when +- * the kernel pulls items from the submission ring, add POLLOUT. +- */ +- memset(&e, 0, sizeof(e)); +- e.events = POLLIN; +- e.data.fd = ringfd; +- +- if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ringfd, &e)) +- goto fail; +- } +- +- iou->sqhead = (uint32_t*) (sq + params.sq_off.head); +- iou->sqtail = (uint32_t*) (sq + params.sq_off.tail); +- iou->sqmask = *(uint32_t*) (sq + params.sq_off.ring_mask); +- iou->sqarray = (uint32_t*) (sq + params.sq_off.array); +- iou->sqflags = (uint32_t*) (sq + params.sq_off.flags); +- iou->cqhead = (uint32_t*) (sq + params.cq_off.head); +- iou->cqtail = (uint32_t*) (sq + params.cq_off.tail); +- iou->cqmask = *(uint32_t*) (sq + params.cq_off.ring_mask); +- iou->sq = sq; +- iou->cqe = sq + params.cq_off.cqes; +- iou->sqe = sqe; +- iou->sqlen = sqlen; +- iou->cqlen = cqlen; +- iou->maxlen = maxlen; +- iou->sqelen = sqelen; +- iou->ringfd = ringfd; +- iou->in_flight = 0; +- +- for (i = 0; i <= iou->sqmask; i++) +- iou->sqarray[i] = i; /* Slot -> sqe identity mapping. */ +- +- return; +- +-fail: +- if (sq != MAP_FAILED) +- munmap(sq, maxlen); +- +- if (sqe != MAP_FAILED) +- munmap(sqe, sqelen); +- +- uv__close(ringfd); +-} +- +- +-static void uv__iou_delete(struct uv__iou* iou) { +- if (iou->ringfd != -1) { +- munmap(iou->sq, iou->maxlen); +- munmap(iou->sqe, iou->sqelen); +- uv__close(iou->ringfd); +- iou->ringfd = -1; +- } +-} +- +- + int uv__platform_loop_init(uv_loop_t* loop) { +- uv__loop_internal_fields_t* lfields; +- +- lfields = uv__get_internal_fields(loop); +- lfields->ctl.ringfd = -1; +- lfields->iou.ringfd = -1; +- + loop->inotify_watchers = NULL; + loop->inotify_fd = -1; + loop->backend_fd = epoll_create1(O_CLOEXEC); +@@ -544,9 +214,6 @@ int uv__platform_loop_init(uv_loop_t* loop) { + if (loop->backend_fd == -1) + return UV__ERR(errno); + +- uv__iou_init(loop->backend_fd, &lfields->iou, 64, UV__IORING_SETUP_SQPOLL); +- uv__iou_init(loop->backend_fd, &lfields->ctl, 256, 0); +- + return 0; + } + +@@ -559,8 +226,6 @@ int uv__io_fork(uv_loop_t* loop) { + + uv__close(loop->backend_fd); + loop->backend_fd = -1; +- +- /* TODO(bnoordhuis) Loses items from the submission and completion rings. */ + uv__platform_loop_delete(loop); + + err = uv__platform_loop_init(loop); +@@ -572,62 +237,42 @@ int uv__io_fork(uv_loop_t* loop) { + + + void uv__platform_loop_delete(uv_loop_t* loop) { +- uv__loop_internal_fields_t* lfields; +- +- lfields = uv__get_internal_fields(loop); +- uv__iou_delete(&lfields->ctl); +- uv__iou_delete(&lfields->iou); +- +- if (loop->inotify_fd != -1) { +- uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN); +- uv__close(loop->inotify_fd); +- loop->inotify_fd = -1; +- } ++ if (loop->inotify_fd == -1) return; ++ uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN); ++ uv__close(loop->inotify_fd); ++ loop->inotify_fd = -1; + } + + +-struct uv__invalidate { +- struct epoll_event (*prep)[256]; +- struct epoll_event* events; +- int nfds; +-}; +- +- + void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) { +- uv__loop_internal_fields_t* lfields; +- struct uv__invalidate* inv; ++ struct epoll_event* events; + struct epoll_event dummy; +- int i; ++ uintptr_t i; ++ uintptr_t nfds; + +- lfields = uv__get_internal_fields(loop); +- inv = lfields->inv; ++ assert(loop->watchers != NULL); ++ assert(fd >= 0); + +- /* Invalidate events with same file descriptor */ +- if (inv != NULL) +- for (i = 0; i < inv->nfds; i++) +- if (inv->events[i].data.fd == fd) +- inv->events[i].data.fd = -1; ++ events = (struct epoll_event*) loop->watchers[loop->nwatchers]; ++ nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1]; ++ if (events != NULL) ++ /* Invalidate events with same file descriptor */ ++ for (i = 0; i < nfds; i++) ++ if (events[i].data.fd == fd) ++ events[i].data.fd = -1; + + /* Remove the file descriptor from the epoll. + * This avoids a problem where the same file description remains open + * in another process, causing repeated junk epoll events. + * + * We pass in a dummy epoll_event, to work around a bug in old kernels. +- * +- * Work around a bug in kernels 3.10 to 3.19 where passing a struct that +- * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings. + */ +- memset(&dummy, 0, sizeof(dummy)); +- +- if (inv == NULL) { ++ if (loop->backend_fd >= 0) { ++ /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that ++ * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings. ++ */ ++ memset(&dummy, 0, sizeof(dummy)); + epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy); +- } else { +- uv__epoll_ctl_prep(loop->backend_fd, +- &lfields->ctl, +- inv->prep, +- EPOLL_CTL_DEL, +- fd, +- &dummy); + } + } + +@@ -653,454 +298,27 @@ int uv__io_check_fd(uv_loop_t* loop, int fd) { + } + + +-/* Caller must initialize SQE and call uv__iou_submit(). */ +-static struct uv__io_uring_sqe* uv__iou_get_sqe(struct uv__iou* iou, +- uv_loop_t* loop, +- uv_fs_t* req) { +- struct uv__io_uring_sqe* sqe; +- uint32_t head; +- uint32_t tail; +- uint32_t mask; +- uint32_t slot; +- +- if (iou->ringfd == -1) +- return NULL; +- +- head = atomic_load_explicit((_Atomic uint32_t*) iou->sqhead, +- memory_order_acquire); +- tail = *iou->sqtail; +- mask = iou->sqmask; +- +- if ((head & mask) == ((tail + 1) & mask)) +- return NULL; /* No room in ring buffer. TODO(bnoordhuis) maybe flush it? */ +- +- slot = tail & mask; +- sqe = iou->sqe; +- sqe = &sqe[slot]; +- memset(sqe, 0, sizeof(*sqe)); +- sqe->user_data = (uintptr_t) req; +- +- /* Pacify uv_cancel(). */ +- req->work_req.loop = loop; +- req->work_req.work = NULL; +- req->work_req.done = NULL; +- QUEUE_INIT(&req->work_req.wq); +- +- uv__req_register(loop, req); +- iou->in_flight++; +- +- return sqe; +-} +- +- +-static void uv__iou_submit(struct uv__iou* iou) { +- uint32_t flags; +- +- atomic_store_explicit((_Atomic uint32_t*) iou->sqtail, +- *iou->sqtail + 1, +- memory_order_release); +- +- flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags, +- memory_order_acquire); +- +- if (flags & UV__IORING_SQ_NEED_WAKEUP) +- if (uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_SQ_WAKEUP)) +- if (errno != EOWNERDEAD) /* Kernel bug. Harmless, ignore. */ +- perror("libuv: io_uring_enter(wakeup)"); /* Can't happen. */ +-} +- +- +-int uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req) { +- struct uv__io_uring_sqe* sqe; +- struct uv__iou* iou; +- +- iou = &uv__get_internal_fields(loop)->iou; +- +- sqe = uv__iou_get_sqe(iou, loop, req); +- if (sqe == NULL) +- return 0; +- +- sqe->fd = req->file; +- sqe->opcode = UV__IORING_OP_CLOSE; +- +- uv__iou_submit(iou); +- +- return 1; +-} +- +- +-int uv__iou_fs_fsync_or_fdatasync(uv_loop_t* loop, +- uv_fs_t* req, +- uint32_t fsync_flags) { +- struct uv__io_uring_sqe* sqe; +- struct uv__iou* iou; +- +- iou = &uv__get_internal_fields(loop)->iou; +- +- sqe = uv__iou_get_sqe(iou, loop, req); +- if (sqe == NULL) +- return 0; +- +- /* Little known fact: setting seq->off and seq->len turns +- * it into an asynchronous sync_file_range() operation. +- */ +- sqe->fd = req->file; +- sqe->fsync_flags = fsync_flags; +- sqe->opcode = UV__IORING_OP_FSYNC; +- +- uv__iou_submit(iou); +- +- return 1; +-} +- +- +-int uv__iou_fs_open(uv_loop_t* loop, uv_fs_t* req) { +- struct uv__io_uring_sqe* sqe; +- struct uv__iou* iou; +- +- iou = &uv__get_internal_fields(loop)->iou; +- +- sqe = uv__iou_get_sqe(iou, loop, req); +- if (sqe == NULL) +- return 0; +- +- sqe->addr = (uintptr_t) req->path; +- sqe->fd = AT_FDCWD; +- sqe->len = req->mode; +- sqe->opcode = UV__IORING_OP_OPENAT; +- sqe->open_flags = req->flags | O_CLOEXEC; +- +- uv__iou_submit(iou); +- +- return 1; +-} +- +- +-int uv__iou_fs_read_or_write(uv_loop_t* loop, +- uv_fs_t* req, +- int is_read) { +- struct uv__io_uring_sqe* sqe; +- struct uv__iou* iou; +- +- /* For the moment, if iovcnt is greater than IOV_MAX, fallback to the +- * threadpool. In the future we might take advantage of IOSQE_IO_LINK. */ +- if (req->nbufs > IOV_MAX) +- return 0; +- +- iou = &uv__get_internal_fields(loop)->iou; +- +- sqe = uv__iou_get_sqe(iou, loop, req); +- if (sqe == NULL) +- return 0; +- +- sqe->addr = (uintptr_t) req->bufs; +- sqe->fd = req->file; +- sqe->len = req->nbufs; +- sqe->off = req->off < 0 ? -1 : req->off; +- sqe->opcode = is_read ? UV__IORING_OP_READV : UV__IORING_OP_WRITEV; +- +- uv__iou_submit(iou); +- +- return 1; +-} +- +- +-int uv__iou_fs_statx(uv_loop_t* loop, +- uv_fs_t* req, +- int is_fstat, +- int is_lstat) { +- struct uv__io_uring_sqe* sqe; +- struct uv__statx* statxbuf; +- struct uv__iou* iou; +- +- statxbuf = uv__malloc(sizeof(*statxbuf)); +- if (statxbuf == NULL) +- return 0; +- +- iou = &uv__get_internal_fields(loop)->iou; +- +- sqe = uv__iou_get_sqe(iou, loop, req); +- if (sqe == NULL) { +- uv__free(statxbuf); +- return 0; +- } +- +- req->ptr = statxbuf; +- +- sqe->addr = (uintptr_t) req->path; +- sqe->addr2 = (uintptr_t) statxbuf; +- sqe->fd = AT_FDCWD; +- sqe->len = 0xFFF; /* STATX_BASIC_STATS + STATX_BTIME */ +- sqe->opcode = UV__IORING_OP_STATX; +- +- if (is_fstat) { +- sqe->addr = (uintptr_t) ""; +- sqe->fd = req->file; +- sqe->statx_flags |= 0x1000; /* AT_EMPTY_PATH */ +- } +- +- if (is_lstat) +- sqe->statx_flags |= AT_SYMLINK_NOFOLLOW; +- +- uv__iou_submit(iou); +- +- return 1; +-} +- +- +-void uv__statx_to_stat(const struct uv__statx* statxbuf, uv_stat_t* buf) { +- buf->st_dev = makedev(statxbuf->stx_dev_major, statxbuf->stx_dev_minor); +- buf->st_mode = statxbuf->stx_mode; +- buf->st_nlink = statxbuf->stx_nlink; +- buf->st_uid = statxbuf->stx_uid; +- buf->st_gid = statxbuf->stx_gid; +- buf->st_rdev = makedev(statxbuf->stx_rdev_major, statxbuf->stx_rdev_minor); +- buf->st_ino = statxbuf->stx_ino; +- buf->st_size = statxbuf->stx_size; +- buf->st_blksize = statxbuf->stx_blksize; +- buf->st_blocks = statxbuf->stx_blocks; +- buf->st_atim.tv_sec = statxbuf->stx_atime.tv_sec; +- buf->st_atim.tv_nsec = statxbuf->stx_atime.tv_nsec; +- buf->st_mtim.tv_sec = statxbuf->stx_mtime.tv_sec; +- buf->st_mtim.tv_nsec = statxbuf->stx_mtime.tv_nsec; +- buf->st_ctim.tv_sec = statxbuf->stx_ctime.tv_sec; +- buf->st_ctim.tv_nsec = statxbuf->stx_ctime.tv_nsec; +- buf->st_birthtim.tv_sec = statxbuf->stx_btime.tv_sec; +- buf->st_birthtim.tv_nsec = statxbuf->stx_btime.tv_nsec; +- buf->st_flags = 0; +- buf->st_gen = 0; +-} +- +- +-static void uv__iou_fs_statx_post(uv_fs_t* req) { +- struct uv__statx* statxbuf; +- uv_stat_t* buf; +- +- buf = &req->statbuf; +- statxbuf = req->ptr; +- req->ptr = NULL; +- +- if (req->result == 0) { +- uv__msan_unpoison(statxbuf, sizeof(*statxbuf)); +- uv__statx_to_stat(statxbuf, buf); +- req->ptr = buf; +- } +- +- uv__free(statxbuf); +-} +- +- +-static void uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou) { +- struct uv__io_uring_cqe* cqe; +- struct uv__io_uring_cqe* e; +- uv_fs_t* req; +- uint32_t head; +- uint32_t tail; +- uint32_t mask; +- uint32_t i; +- uint32_t flags; +- int nevents; +- int rc; +- +- head = *iou->cqhead; +- tail = atomic_load_explicit((_Atomic uint32_t*) iou->cqtail, +- memory_order_acquire); +- mask = iou->cqmask; +- cqe = iou->cqe; +- nevents = 0; +- +- for (i = head; i != tail; i++) { +- e = &cqe[i & mask]; +- +- req = (uv_fs_t*) (uintptr_t) e->user_data; +- assert(req->type == UV_FS); +- +- uv__req_unregister(loop, req); +- iou->in_flight--; +- +- /* io_uring stores error codes as negative numbers, same as libuv. */ +- req->result = e->res; +- +- switch (req->fs_type) { +- case UV_FS_FSTAT: +- case UV_FS_LSTAT: +- case UV_FS_STAT: +- uv__iou_fs_statx_post(req); +- break; +- default: /* Squelch -Wswitch warnings. */ +- break; +- } +- +- uv__metrics_update_idle_time(loop); +- req->cb(req); +- nevents++; +- } +- +- atomic_store_explicit((_Atomic uint32_t*) iou->cqhead, +- tail, +- memory_order_release); +- +- /* Check whether CQE's overflowed, if so enter the kernel to make them +- * available. Don't grab them immediately but in the next loop iteration to +- * avoid loop starvation. */ +- flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags, +- memory_order_acquire); +- +- if (flags & UV__IORING_SQ_CQ_OVERFLOW) { +- do +- rc = uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_GETEVENTS); +- while (rc == -1 && errno == EINTR); +- +- if (rc < 0) +- perror("libuv: io_uring_enter(getevents)"); /* Can't happen. */ +- } +- +- uv__metrics_inc_events(loop, nevents); +- if (uv__get_internal_fields(loop)->current_timeout == 0) +- uv__metrics_inc_events_waiting(loop, nevents); +-} +- +- +-static void uv__epoll_ctl_prep(int epollfd, +- struct uv__iou* ctl, +- struct epoll_event (*events)[256], +- int op, +- int fd, +- struct epoll_event* e) { +- struct uv__io_uring_sqe* sqe; +- struct epoll_event* pe; +- uint32_t mask; +- uint32_t slot; +- +- if (ctl->ringfd == -1) { +- if (!epoll_ctl(epollfd, op, fd, e)) +- return; +- +- if (op == EPOLL_CTL_DEL) +- return; /* Ignore errors, may be racing with another thread. */ +- +- if (op != EPOLL_CTL_ADD) +- abort(); +- +- if (errno != EEXIST) +- abort(); +- +- /* File descriptor that's been watched before, update event mask. */ +- if (!epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, e)) +- return; +- +- abort(); +- } else { +- mask = ctl->sqmask; +- slot = (*ctl->sqtail)++ & mask; +- +- pe = &(*events)[slot]; +- *pe = *e; +- +- sqe = ctl->sqe; +- sqe = &sqe[slot]; +- +- memset(sqe, 0, sizeof(*sqe)); +- sqe->addr = (uintptr_t) pe; +- sqe->fd = epollfd; +- sqe->len = op; +- sqe->off = fd; +- sqe->opcode = UV__IORING_OP_EPOLL_CTL; +- sqe->user_data = op | slot << 2 | (int64_t) fd << 32; +- +- if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask)) +- uv__epoll_ctl_flush(epollfd, ctl, events); +- } +-} +- +- +-static void uv__epoll_ctl_flush(int epollfd, +- struct uv__iou* ctl, +- struct epoll_event (*events)[256]) { +- struct epoll_event oldevents[256]; +- struct uv__io_uring_cqe* cqe; +- uint32_t oldslot; +- uint32_t slot; +- uint32_t n; +- int fd; +- int op; +- int rc; +- +- STATIC_ASSERT(sizeof(oldevents) == sizeof(*events)); +- assert(ctl->ringfd != -1); +- assert(*ctl->sqhead != *ctl->sqtail); +- +- n = *ctl->sqtail - *ctl->sqhead; +- do +- rc = uv__io_uring_enter(ctl->ringfd, n, n, UV__IORING_ENTER_GETEVENTS); +- while (rc == -1 && errno == EINTR); +- +- if (rc < 0) +- perror("libuv: io_uring_enter(getevents)"); /* Can't happen. */ +- +- if (rc != (int) n) +- abort(); +- +- assert(*ctl->sqhead == *ctl->sqtail); +- +- memcpy(oldevents, *events, sizeof(*events)); +- +- /* Failed submissions are either EPOLL_CTL_DEL commands for file descriptors +- * that have been closed, or EPOLL_CTL_ADD commands for file descriptors +- * that we are already watching. Ignore the former and retry the latter +- * with EPOLL_CTL_MOD. +- */ +- while (*ctl->cqhead != *ctl->cqtail) { +- slot = (*ctl->cqhead)++ & ctl->cqmask; +- +- cqe = ctl->cqe; +- cqe = &cqe[slot]; +- +- if (cqe->res == 0) +- continue; +- +- fd = cqe->user_data >> 32; +- op = 3 & cqe->user_data; +- oldslot = 255 & (cqe->user_data >> 2); +- +- if (op == EPOLL_CTL_DEL) +- continue; +- +- if (op != EPOLL_CTL_ADD) +- abort(); +- +- if (cqe->res != -EEXIST) +- abort(); +- +- uv__epoll_ctl_prep(epollfd, +- ctl, +- events, +- EPOLL_CTL_MOD, +- fd, +- &oldevents[oldslot]); +- } +-} +- +- + void uv__io_poll(uv_loop_t* loop, int timeout) { +- uv__loop_internal_fields_t* lfields; ++ /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes ++ * effectively infinite on 32 bits architectures. To avoid blocking ++ * indefinitely, we cap the timeout and poll again if necessary. ++ * ++ * Note that "30 minutes" is a simplification because it depends on ++ * the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200, ++ * that being the largest value I have seen in the wild (and only once.) ++ */ ++ static const int max_safe_timeout = 1789569; + struct epoll_event events[1024]; +- struct epoll_event prep[256]; +- struct uv__invalidate inv; + struct epoll_event* pe; + struct epoll_event e; +- struct uv__iou* ctl; +- struct uv__iou* iou; + int real_timeout; + QUEUE* q; + uv__io_t* w; + sigset_t* sigmask; + sigset_t sigset; + uint64_t base; +- int have_iou_events; + int have_signals; + int nevents; +- int epollfd; + int count; + int nfds; + int fd; +@@ -1109,9 +327,47 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + int user_timeout; + int reset_timeout; + +- lfields = uv__get_internal_fields(loop); +- ctl = &lfields->ctl; +- iou = &lfields->iou; ++ if (loop->nfds == 0) { ++ assert(QUEUE_EMPTY(&loop->watcher_queue)); ++ return; ++ } ++ ++ memset(&e, 0, sizeof(e)); ++ ++ while (!QUEUE_EMPTY(&loop->watcher_queue)) { ++ q = QUEUE_HEAD(&loop->watcher_queue); ++ QUEUE_REMOVE(q); ++ QUEUE_INIT(q); ++ ++ w = QUEUE_DATA(q, uv__io_t, watcher_queue); ++ assert(w->pevents != 0); ++ assert(w->fd >= 0); ++ assert(w->fd < (int) loop->nwatchers); ++ ++ e.events = w->pevents; ++ e.data.fd = w->fd; ++ ++ if (w->events == 0) ++ op = EPOLL_CTL_ADD; ++ else ++ op = EPOLL_CTL_MOD; ++ ++ /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching ++ * events, skip the syscall and squelch the events after epoll_wait(). ++ */ ++ if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) { ++ if (errno != EEXIST) ++ abort(); ++ ++ assert(op == EPOLL_CTL_ADD); ++ ++ /* We've reactivated a file descriptor that's been watched before. */ ++ if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e)) ++ abort(); ++ } ++ ++ w->events = w->pevents; ++ } + + sigmask = NULL; + if (loop->flags & UV_LOOP_BLOCK_SIGPROF) { +@@ -1125,7 +381,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + count = 48; /* Benchmarks suggest this gives the best throughput. */ + real_timeout = timeout; + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -1134,56 +390,24 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + user_timeout = 0; + } + +- epollfd = loop->backend_fd; +- +- memset(&e, 0, sizeof(e)); +- +- while (!QUEUE_EMPTY(&loop->watcher_queue)) { +- q = QUEUE_HEAD(&loop->watcher_queue); +- w = QUEUE_DATA(q, uv__io_t, watcher_queue); +- QUEUE_REMOVE(q); +- QUEUE_INIT(q); +- +- op = EPOLL_CTL_MOD; +- if (w->events == 0) +- op = EPOLL_CTL_ADD; +- +- w->events = w->pevents; +- e.events = w->pevents; +- e.data.fd = w->fd; +- +- uv__epoll_ctl_prep(epollfd, ctl, &prep, op, w->fd, &e); +- } +- +- inv.events = events; +- inv.prep = &prep; +- inv.nfds = -1; +- + for (;;) { +- if (loop->nfds == 0) +- if (iou->in_flight == 0) +- break; +- +- /* All event mask mutations should be visible to the kernel before +- * we enter epoll_pwait(). +- */ +- if (ctl->ringfd != -1) +- while (*ctl->sqhead != *ctl->sqtail) +- uv__epoll_ctl_flush(epollfd, ctl, &prep); +- + /* Only need to set the provider_entry_time if timeout != 0. The function + * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME. + */ + if (timeout != 0) + uv__metrics_set_provider_entry_time(loop); + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. ++ /* See the comment for max_safe_timeout for an explanation of why ++ * this is necessary. Executive summary: kernel bug workaround. + */ +- lfields->current_timeout = timeout; ++ if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout) ++ timeout = max_safe_timeout; + +- nfds = epoll_pwait(epollfd, events, ARRAY_SIZE(events), timeout, sigmask); ++ nfds = epoll_pwait(loop->backend_fd, ++ events, ++ ARRAY_SIZE(events), ++ timeout, ++ sigmask); + + /* Update loop->time unconditionally. It's tempting to skip the update when + * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the +@@ -1203,7 +427,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + continue; + + if (timeout == 0) +- break; ++ return; + + /* We may have been inside the system call for longer than |timeout| + * milliseconds so we need to update the timestamp to avoid drift. +@@ -1224,18 +448,27 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + continue; + + if (timeout == 0) +- break; ++ return; + + /* Interrupted by a signal. Update timeout and poll again. */ + goto update_timeout; + } + +- have_iou_events = 0; + have_signals = 0; + nevents = 0; + +- inv.nfds = nfds; +- lfields->inv = &inv; ++ { ++ /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */ ++ union { ++ struct epoll_event* events; ++ uv__io_t* watchers; ++ } x; ++ ++ x.events = events; ++ assert(loop->watchers != NULL); ++ loop->watchers[loop->nwatchers] = x.watchers; ++ loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds; ++ } + + for (i = 0; i < nfds; i++) { + pe = events + i; +@@ -1245,12 +478,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + if (fd == -1) + continue; + +- if (fd == iou->ringfd) { +- uv__poll_io_uring(loop, iou); +- have_iou_events = 1; +- continue; +- } +- + assert(fd >= 0); + assert((unsigned) fd < loop->nwatchers); + +@@ -1262,7 +489,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + * Ignore all errors because we may be racing with another thread + * when the file descriptor is closed. + */ +- uv__epoll_ctl_prep(epollfd, ctl, &prep, EPOLL_CTL_DEL, fd, pe); ++ epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe); + continue; + } + +@@ -1319,13 +546,11 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN); + } + +- lfields->inv = NULL; +- +- if (have_iou_events != 0) +- break; /* Event loop should cycle now so don't poll again. */ ++ loop->watchers[loop->nwatchers] = NULL; ++ loop->watchers[loop->nwatchers + 1] = NULL; + + if (have_signals != 0) +- break; /* Event loop should cycle now so don't poll again. */ ++ return; /* Event loop should cycle now so don't poll again. */ + + if (nevents != 0) { + if (nfds == ARRAY_SIZE(events) && --count != 0) { +@@ -1333,11 +558,11 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + timeout = 0; + continue; + } +- break; ++ return; + } + + if (timeout == 0) +- break; ++ return; + + if (timeout == -1) + continue; +@@ -1347,14 +572,10 @@ update_timeout: + + real_timeout -= (loop->time - base); + if (real_timeout <= 0) +- break; ++ return; + + timeout = real_timeout; + } +- +- if (ctl->ringfd != -1) +- while (*ctl->sqhead != *ctl->sqtail) +- uv__epoll_ctl_flush(epollfd, ctl, &prep); + } + + uint64_t uv__hrtime(uv_clocktype_t type) { +@@ -1867,7 +1088,7 @@ static uint64_t uv__read_uint64(const char* filename) { + if (0 == uv__slurp(filename, buf, sizeof(buf))) + if (1 != sscanf(buf, "%" PRIu64, &rc)) + if (0 == strcmp(buf, "max\n")) +- rc = UINT64_MAX; ++ rc = ~0ull; + + return rc; + } +@@ -1903,7 +1124,6 @@ static void uv__get_cgroup1_memory_limits(char buf[static 1024], uint64_t* high, + char filename[4097]; + char* p; + int n; +- uint64_t cgroup1_max; + + /* Find out where the controller is mounted. */ + p = uv__cgroup1_find_memory_controller(buf, &n); +@@ -1920,22 +1140,12 @@ static void uv__get_cgroup1_memory_limits(char buf[static 1024], uint64_t* high, + * as indicated by uv__read_uint64 returning 0. + */ + if (*high != 0 && *max != 0) +- goto update_limits; ++ return; + } + + /* Fall back to the limits of the global memory controller. */ + *high = uv__read_uint64("/sys/fs/cgroup/memory/memory.soft_limit_in_bytes"); + *max = uv__read_uint64("/sys/fs/cgroup/memory/memory.limit_in_bytes"); +- +- /* uv__read_uint64 detects cgroup2's "max", so we need to separately detect +- * cgroup1's maximum value (which is derived from LONG_MAX and PAGE_SIZE). +- */ +-update_limits: +- cgroup1_max = LONG_MAX & ~(sysconf(_SC_PAGESIZE) - 1); +- if (*high == cgroup1_max) +- *high = UINT64_MAX; +- if (*max == cgroup1_max) +- *max = UINT64_MAX; + } + + static void uv__get_cgroup2_memory_limits(char buf[static 1024], uint64_t* high, +diff --git a/deps/uv/src/unix/os390.c b/deps/uv/src/unix/os390.c +index a87c2d77faf..3954b2c2753 100644 +--- a/deps/uv/src/unix/os390.c ++++ b/deps/uv/src/unix/os390.c +@@ -808,7 +808,6 @@ static int os390_message_queue_handler(uv__os390_epoll* ep) { + + void uv__io_poll(uv_loop_t* loop, int timeout) { + static const int max_safe_timeout = 1789569; +- uv__loop_internal_fields_t* lfields; + struct epoll_event events[1024]; + struct epoll_event* pe; + struct epoll_event e; +@@ -831,8 +830,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + return; + } + +- lfields = uv__get_internal_fields(loop); +- + while (!QUEUE_EMPTY(&loop->watcher_queue)) { + uv_stream_t* stream; + +@@ -880,7 +877,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + int nevents = 0; + have_signals = 0; + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -899,12 +896,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout) + timeout = max_safe_timeout; + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. +- */ +- lfields->current_timeout = timeout; +- + nfds = epoll_wait(loop->ep, events, + ARRAY_SIZE(events), timeout); + +diff --git a/deps/uv/src/unix/posix-poll.c b/deps/uv/src/unix/posix-poll.c +index 7e7de86845d..711780ece8d 100644 +--- a/deps/uv/src/unix/posix-poll.c ++++ b/deps/uv/src/unix/posix-poll.c +@@ -132,7 +132,6 @@ static void uv__pollfds_del(uv_loop_t* loop, int fd) { + + + void uv__io_poll(uv_loop_t* loop, int timeout) { +- uv__loop_internal_fields_t* lfields; + sigset_t* pset; + sigset_t set; + uint64_t time_base; +@@ -153,8 +152,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + return; + } + +- lfields = uv__get_internal_fields(loop); +- + /* Take queued watchers and add their fds to our poll fds array. */ + while (!QUEUE_EMPTY(&loop->watcher_queue)) { + q = QUEUE_HEAD(&loop->watcher_queue); +@@ -182,7 +179,7 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + assert(timeout >= -1); + time_base = loop->time; + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -201,12 +198,6 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { + if (timeout != 0) + uv__metrics_set_provider_entry_time(loop); + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. +- */ +- lfields->current_timeout = timeout; +- + if (pset != NULL) + if (pthread_sigmask(SIG_BLOCK, pset, NULL)) + abort(); +diff --git a/deps/uv/src/uv-common.h b/deps/uv/src/uv-common.h +index decde5362c8..2720121addc 100644 +--- a/deps/uv/src/uv-common.h ++++ b/deps/uv/src/uv-common.h +@@ -396,37 +396,9 @@ struct uv__loop_metrics_s { + void uv__metrics_update_idle_time(uv_loop_t* loop); + void uv__metrics_set_provider_entry_time(uv_loop_t* loop); + +-#ifdef __linux__ +-struct uv__iou { +- uint32_t* sqhead; +- uint32_t* sqtail; +- uint32_t* sqarray; +- uint32_t sqmask; +- uint32_t* sqflags; +- uint32_t* cqhead; +- uint32_t* cqtail; +- uint32_t cqmask; +- void* sq; /* pointer to munmap() on event loop teardown */ +- void* cqe; /* pointer to array of struct uv__io_uring_cqe */ +- void* sqe; /* pointer to array of struct uv__io_uring_sqe */ +- size_t sqlen; +- size_t cqlen; +- size_t maxlen; +- size_t sqelen; +- int ringfd; +- uint32_t in_flight; +-}; +-#endif /* __linux__ */ +- + struct uv__loop_internal_fields_s { + unsigned int flags; + uv__loop_metrics_t loop_metrics; +- int current_timeout; +-#ifdef __linux__ +- struct uv__iou ctl; +- struct uv__iou iou; +- void* inv; /* used by uv__platform_invalidate_fd() */ +-#endif /* __linux__ */ + }; + + #endif /* UV_COMMON_H_ */ +diff --git a/deps/uv/src/win/core.c b/deps/uv/src/win/core.c +index 9a3be58849a..e4041ec86a6 100644 +--- a/deps/uv/src/win/core.c ++++ b/deps/uv/src/win/core.c +@@ -424,7 +424,6 @@ int uv_backend_timeout(const uv_loop_t* loop) { + + + static void uv__poll_wine(uv_loop_t* loop, DWORD timeout) { +- uv__loop_internal_fields_t* lfields; + DWORD bytes; + ULONG_PTR key; + OVERLAPPED* overlapped; +@@ -434,10 +433,9 @@ static void uv__poll_wine(uv_loop_t* loop, DWORD timeout) { + uint64_t user_timeout; + int reset_timeout; + +- lfields = uv__get_internal_fields(loop); + timeout_time = loop->time + timeout; + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -452,12 +450,6 @@ static void uv__poll_wine(uv_loop_t* loop, DWORD timeout) { + if (timeout != 0) + uv__metrics_set_provider_entry_time(loop); + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. +- */ +- lfields->current_timeout = timeout; +- + GetQueuedCompletionStatus(loop->iocp, + &bytes, + &key, +@@ -515,7 +507,6 @@ static void uv__poll_wine(uv_loop_t* loop, DWORD timeout) { + + + static void uv__poll(uv_loop_t* loop, DWORD timeout) { +- uv__loop_internal_fields_t* lfields; + BOOL success; + uv_req_t* req; + OVERLAPPED_ENTRY overlappeds[128]; +@@ -527,10 +518,9 @@ static void uv__poll(uv_loop_t* loop, DWORD timeout) { + uint64_t actual_timeout; + int reset_timeout; + +- lfields = uv__get_internal_fields(loop); + timeout_time = loop->time + timeout; + +- if (lfields->flags & UV_METRICS_IDLE_TIME) { ++ if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { + reset_timeout = 1; + user_timeout = timeout; + timeout = 0; +@@ -547,12 +537,6 @@ static void uv__poll(uv_loop_t* loop, DWORD timeout) { + if (timeout != 0) + uv__metrics_set_provider_entry_time(loop); + +- /* Store the current timeout in a location that's globally accessible so +- * other locations like uv__work_done() can determine whether the queue +- * of events in the callback were waiting when poll was called. +- */ +- lfields->current_timeout = timeout; +- + success = pGetQueuedCompletionStatusEx(loop->iocp, + overlappeds, + ARRAY_SIZE(overlappeds), |