diff options
Diffstat (limited to 'fs/pipe.c')
-rw-r--r-- | fs/pipe.c | 50 |
1 files changed, 32 insertions, 18 deletions
diff --git a/fs/pipe.c b/fs/pipe.c index bfd946a9ad01..6d4342bad9f1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -32,6 +32,21 @@ #include "internal.h" /* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + +/* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ @@ -348,10 +363,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * _very_ unlikely case that the pipe was full, but we got * no data. */ - if (unlikely(was_full)) { + if (unlikely(was_full)) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); /* * But because we didn't read anything, at this point we can @@ -370,12 +384,11 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) wake_next_reader = false; __pipe_unlock(pipe); - if (was_full) { + if (was_full) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } if (wake_next_reader) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); if (ret > 0) file_accessed(filp); return ret; @@ -429,14 +442,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Only wake up if the pipe started out empty, since - * otherwise there should be no readers waiting. - * * If it wasn't empty we try to merge new data into * the last buffer. * * That naturally merges small writes, but it also - * page-aligs the rest of the writes for large writes + * page-aligns the rest of the writes for large writes * spanning multiple pages. */ head = pipe->head; @@ -553,10 +563,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * become empty while we dropped the lock. */ __pipe_unlock(pipe); - if (was_empty) { + if (was_empty) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); __pipe_lock(pipe); was_empty = pipe_empty(pipe->head, pipe->tail); @@ -575,11 +584,13 @@ out: * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs + * + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. */ - if (was_empty) { + if (was_empty || pipe->poll_usage) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); if (wake_next_writer) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { @@ -639,6 +650,9 @@ pipe_poll(struct file *filp, poll_table *wait) struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; + /* Epoll has some historical nasty semantics, this enables them */ + pipe->poll_usage = 1; + /* * Reading pipe state only -- no need for acquiring the semaphore. * @@ -781,8 +795,8 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { - user_bufs = account_pipe_buffers(user, pipe_bufs, 1); - pipe_bufs = 1; + user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); + pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) |