From fcb14cb1bdacec5b4374fe161e83fb8208164a85 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 14:59:25 -0400 Subject: new iov_iter flavour - ITER_UBUF Equivalent of single-segment iovec. Initialized by iov_iter_ubuf(), checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC ones. We are going to expose the things like ->write_iter() et.al. to those in subsequent commits. New predicate (user_backed_iter()) that is true for ITER_IOVEC and ITER_UBUF; places like direct-IO handling should use that for checking that pages we modify after getting them from iov_iter_get_pages() would need to be dirtied. DO NOT assume that replacing iter_is_iovec() with user_backed_iter() will solve all problems - there's code that uses iter_is_iovec() to decide how to poke around in iov_iter guts and for that the predicate replacement obviously won't suffice. Signed-off-by: Al Viro --- lib/iov_iter.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 69 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0e0be334dbee..b3493d20536e 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -16,6 +16,16 @@ #define PIPE_PARANOIA /* for now */ +/* covers ubuf and kbuf alike */ +#define iterate_buf(i, n, base, len, off, __p, STEP) { \ + size_t __maybe_unused off = 0; \ + len = n; \ + base = __p + i->iov_offset; \ + len -= (STEP); \ + i->iov_offset += len; \ + n = len; \ +} + /* covers iovec and kvec alike */ #define iterate_iovec(i, n, base, len, off, __p, STEP) { \ size_t off = 0; \ @@ -110,7 +120,12 @@ __out: \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ - if (likely(iter_is_iovec(i))) { \ + if (likely(iter_is_ubuf(i))) { \ + void __user *base; \ + size_t len; \ + iterate_buf(i, n, base, len, off, \ + i->ubuf, (I)) \ + } else if (likely(iter_is_iovec(i))) { \ const struct iovec *iov = i->iov; \ void __user *base; \ size_t len; \ @@ -275,7 +290,11 @@ out: */ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) { - if (iter_is_iovec(i)) { + if (iter_is_ubuf(i)) { + size_t n = min(size, iov_iter_count(i)); + n -= fault_in_readable(i->ubuf + i->iov_offset, n); + return size - n; + } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; @@ -314,7 +333,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable); */ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) { - if (iter_is_iovec(i)) { + if (iter_is_ubuf(i)) { + size_t n = min(size, iov_iter_count(i)); + n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n); + return size - n; + } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; @@ -345,6 +368,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, *i = (struct iov_iter) { .iter_type = ITER_IOVEC, .nofault = false, + .user_backed = true, .data_source = direction, .iov = iov, .nr_segs = nr_segs, @@ -494,7 +518,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyout(base, addr + off, len), @@ -583,7 +607,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); __iterate_and_advance(i, bytes, base, len, off, copyout_mc(base, addr + off, len), @@ -601,7 +625,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return 0; } - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyin(addr + off, base, len), @@ -894,16 +918,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) size = i->count; - if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { + if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) { + i->iov_offset += size; + i->count -= size; + } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { /* iovec and kvec have identical layouts */ iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); } else if (iov_iter_is_pipe(i)) { pipe_advance(i, size); - } else if (unlikely(iov_iter_is_xarray(i))) { - i->iov_offset += size; - i->count -= size; } else if (iov_iter_is_discard(i)) { i->count -= size; } @@ -950,7 +974,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) return; } unroll -= i->iov_offset; - if (iov_iter_is_xarray(i)) { + if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) { BUG(); /* We should never go beyond the start of the specified * range since we might then be straying into pages that * aren't pinned. @@ -1158,6 +1182,14 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { + if (likely(iter_is_ubuf(i))) { + if (i->count & len_mask) + return false; + if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) + return false; + return true; + } + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_aligned_iovec(i, addr_mask, len_mask); @@ -1233,6 +1265,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) unsigned long iov_iter_alignment(const struct iov_iter *i) { + if (likely(iter_is_ubuf(i))) { + size_t size = i->count; + if (size) + return ((unsigned long)i->ubuf + i->iov_offset) | size; + return 0; + } + /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_alignment_iovec(i); @@ -1263,6 +1302,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) size_t size = i->count; unsigned k; + if (iter_is_ubuf(i)) + return 0; + if (WARN_ON(!iter_is_iovec(i))) return ~0U; @@ -1385,12 +1427,15 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); } -/* must be done on non-empty ITER_IOVEC one */ +/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { size_t skip; long k; + if (iter_is_ubuf(i)) + return (unsigned long)i->ubuf + i->iov_offset; + for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { size_t len = i->iov[k].iov_len - skip; @@ -1432,7 +1477,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; - if (likely(iter_is_iovec(i))) { + if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; unsigned long addr; @@ -1559,7 +1604,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; - if (likely(iter_is_iovec(i))) { + if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; unsigned long addr; @@ -1715,6 +1760,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) { if (unlikely(!i->count)) return 0; + if (likely(iter_is_ubuf(i))) { + unsigned offs = offset_in_page(i->ubuf + i->iov_offset); + int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE); + return min(npages, maxpages); + } /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_npages(i, maxpages); @@ -1749,17 +1799,16 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) WARN_ON(1); return NULL; } - if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) - return NULL; if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), flags); - else + else if (iov_iter_is_kvec(new) || iter_is_iovec(new)) /* iovec and kvec have identical layout */ return new->iov = kmemdup(new->iov, new->nr_segs * sizeof(struct iovec), flags); + return NULL; } EXPORT_SYMBOL(dup_iter); @@ -1953,10 +2002,12 @@ EXPORT_SYMBOL(import_single_range); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && - !iov_iter_is_kvec(i)) + !iov_iter_is_kvec(i) && !iter_is_ubuf(i)) return; i->iov_offset = state->iov_offset; i->count = state->count; + if (iter_is_ubuf(i)) + return; /* * For the *vec iters, nr_segs + iov is constant - if we increment * the vec, then we also decrement the nr_segs count. Hence we don't -- cgit v1.2.3 From 2dcedb2a549a4d7430538213b1b28ef7271bc0aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jun 2022 10:24:37 -0400 Subject: ITER_PIPE: helper for getting pipe buffer by index pipe_buffer instances of a pipe are organized as a ring buffer, with power-of-2 size. Indices are kept *not* reduced modulo ring size, so the buffer refered to by index N is pipe->bufs[N & (pipe->ring_size - 1)]. Ring size can change over the lifetime of a pipe, but not while the pipe is locked. So for any iov_iter primitives it's a constant. Original conversion of pipes to this layout went overboard trying to microoptimize that - calculating pipe->ring_size - 1, storing it in a local variable and using through the function. In some cases it might be warranted, but most of the times it only obfuscates what's going on in there. Introduce a helper (pipe_buf(pipe, N)) that would encapsulate that and use it in the obvious cases. More will follow... Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- lib/iov_iter.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b3493d20536e..048026d5aa0d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -183,13 +183,18 @@ static int copyin(void *to, const void __user *from, size_t n) return n; } +static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, + unsigned int slot) +{ + return &pipe->bufs[slot & (pipe->ring_size - 1)]; +} + #ifdef PIPE_PARANOIA static bool sanity(const struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; unsigned int p_head = pipe->head; unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); unsigned int i_head = i->head; unsigned int idx; @@ -201,7 +206,7 @@ static bool sanity(const struct iov_iter *i) if (unlikely(i_head != p_head - 1)) goto Bad; // must be at the last buffer... - p = &pipe->bufs[i_head & p_mask]; + p = pipe_buf(pipe, i_head); if (unlikely(p->offset + p->len != i->iov_offset)) goto Bad; // ... at the end of segment } else { @@ -386,11 +391,10 @@ static inline bool allocated(struct pipe_buffer *buf) static inline void data_start(const struct iov_iter *i, unsigned int *iter_headp, size_t *offp) { - unsigned int p_mask = i->pipe->ring_size - 1; unsigned int iter_head = i->head; size_t off = i->iov_offset; - if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || + if (off && (!allocated(pipe_buf(i->pipe, iter_head)) || off == PAGE_SIZE)) { iter_head++; off = 0; @@ -1280,10 +1284,9 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) return iov_iter_alignment_bvec(i); if (iov_iter_is_pipe(i)) { - unsigned int p_mask = i->pipe->ring_size - 1; size_t size = i->count; - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) + if (size && i->iov_offset && allocated(pipe_buf(i->pipe, i->head))) return size | i->iov_offset; return size; } -- cgit v1.2.3 From 47b7fcae419dc940e3fb8e58088a5b80ad813bbf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 13 Jun 2022 14:30:15 -0400 Subject: ITER_PIPE: helpers for adding pipe buffers There are only two kinds of pipe_buffer in the area used by ITER_PIPE. 1) anonymous - copy_to_iter() et.al. end up creating those and copying data there. They have zero ->offset, and their ->ops points to default_pipe_page_ops. 2) zero-copy ones - those come from copy_page_to_iter(), and page comes from caller. ->offset is also caller-supplied - it might be non-zero. ->ops points to page_cache_pipe_buf_ops. Move creation and insertion of those into helpers - push_anon(pipe, size) and push_page(pipe, page, offset, size) resp., separating them from the "could we avoid creating a new buffer by merging with the current head?" logics. Acked-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 88 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 048026d5aa0d..a5c436e564e8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -231,15 +231,39 @@ Bad: #define sanity(i) true #endif +static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size) +{ + struct page *page = alloc_page(GFP_USER); + if (page) { + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); + *buf = (struct pipe_buffer) { + .ops = &default_pipe_buf_ops, + .page = page, + .offset = 0, + .len = size + }; + } + return page; +} + +static void push_page(struct pipe_inode_info *pipe, struct page *page, + unsigned int offset, unsigned int size) +{ + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); + *buf = (struct pipe_buffer) { + .ops = &page_cache_pipe_buf_ops, + .page = page, + .offset = offset, + .len = size + }; + get_page(page); +} + static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off; + unsigned int head = pipe->head; if (unlikely(bytes > i->count)) bytes = i->count; @@ -250,32 +274,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by if (!sanity(i)) return 0; - off = i->iov_offset; - buf = &pipe->bufs[i_head & p_mask]; - if (off) { - if (offset == off && buf->page == page) { - /* merge with the last one */ + if (offset && i->iov_offset == offset) { // could we merge it? + struct pipe_buffer *buf = pipe_buf(pipe, head - 1); + if (buf->page == page) { buf->len += bytes; i->iov_offset += bytes; - goto out; + i->count -= bytes; + return bytes; } - i_head++; - buf = &pipe->bufs[i_head & p_mask]; } - if (pipe_full(i_head, p_tail, pipe->max_usage)) + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; - buf->ops = &page_cache_pipe_buf_ops; - buf->flags = 0; - get_page(page); - buf->page = page; - buf->offset = offset; - buf->len = bytes; - - pipe->head = i_head + 1; + push_page(pipe, page, offset, bytes); i->iov_offset = offset + bytes; - i->head = i_head; -out: + i->head = head; i->count -= bytes; return bytes; } @@ -407,8 +420,6 @@ static size_t push_pipe(struct iov_iter *i, size_t size, int *iter_headp, size_t *offp) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; unsigned int iter_head; size_t off; ssize_t left; @@ -423,30 +434,23 @@ static size_t push_pipe(struct iov_iter *i, size_t size, *iter_headp = iter_head; *offp = off; if (off) { + struct pipe_buffer *buf = pipe_buf(pipe, iter_head); + left -= PAGE_SIZE - off; if (left <= 0) { - pipe->bufs[iter_head & p_mask].len += size; + buf->len += size; return size; } - pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; - iter_head++; + buf->len = PAGE_SIZE; } - while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { - struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; - struct page *page = alloc_page(GFP_USER); + while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + struct page *page = push_anon(pipe, + min_t(ssize_t, left, PAGE_SIZE)); if (!page) break; - buf->ops = &default_pipe_buf_ops; - buf->flags = 0; - buf->page = page; - buf->offset = 0; - buf->len = min_t(ssize_t, left, PAGE_SIZE); - left -= buf->len; - iter_head++; - pipe->head = iter_head; - - if (left == 0) + left -= PAGE_SIZE; + if (left <= 0) return size; } return size - left; -- cgit v1.2.3 From 8fad7767edcfd3f93e0d9985cb2dc1db270b8719 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jun 2022 13:53:53 -0400 Subject: ITER_PIPE: allocate buffers as we go in copy-to-pipe primitives New helper: append_pipe(). Extends the last buffer if possible, allocates a new one otherwise. Returns page and offset in it on success, NULL on failure. iov_iter is advanced past the data we've got. Use that instead of push_pipe() in copy-to-pipe primitives; they get simpler that way. Handling of short copy (in "mc" one) is done simply by iov_iter_revert() - iov_iter is in consistent state after that one, so we can use that. [Fix for braino caught by Liu Xinpeng folded in] [another braino fix, this time in copy_pipe_to_iter() and pipe_zero(); caught by testcase from Hugh Dickins] Signed-off-by: Al Viro --- lib/iov_iter.c | 171 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 98 insertions(+), 73 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a5c436e564e8..e22c272cb420 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -259,6 +259,45 @@ static void push_page(struct pipe_inode_info *pipe, struct page *page, get_page(page); } +static inline bool allocated(struct pipe_buffer *buf) +{ + return buf->ops == &default_pipe_buf_ops; +} + +static struct page *append_pipe(struct iov_iter *i, size_t size, + unsigned int *off) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t offset = i->iov_offset; + struct pipe_buffer *buf; + struct page *page; + + if (offset && offset < PAGE_SIZE) { + // some space in the last buffer; can we add to it? + buf = pipe_buf(pipe, pipe->head - 1); + if (allocated(buf)) { + size = min_t(size_t, size, PAGE_SIZE - offset); + buf->len += size; + i->iov_offset += size; + i->count -= size; + *off = offset; + return buf->page; + } + } + // OK, we need a new buffer + *off = 0; + size = min_t(size_t, size, PAGE_SIZE); + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + return NULL; + page = push_anon(pipe, size); + if (!page) + return NULL; + i->head = pipe->head - 1; + i->iov_offset = size; + i->count -= size; + return page; +} + static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { @@ -396,11 +435,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static inline bool allocated(struct pipe_buffer *buf) -{ - return buf->ops == &default_pipe_buf_ops; -} - static inline void data_start(const struct iov_iter *i, unsigned int *iter_headp, size_t *offp) { @@ -459,28 +493,24 @@ static size_t push_pipe(struct iov_iter *i, size_t size, static size_t copy_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off; + unsigned int off, chunk; - if (!sanity(i)) + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) + if (!sanity(i)) return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); - i->head = i_head; - i->iov_offset = off + chunk; - n -= chunk; + + for (size_t n = bytes; n; n -= chunk) { + struct page *page = append_pipe(i, n, &off); + chunk = min_t(size_t, n, PAGE_SIZE - off); + if (!page) + return bytes - n; + memcpy_to_page(page, off, addr, chunk); addr += chunk; - off = 0; - i_head++; - } while (n); - i->count -= bytes; + } return bytes; } @@ -494,31 +524,32 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, struct iov_iter *i, __wsum *sump) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; __wsum sum = *sump; size_t off = 0; - unsigned int i_head; - size_t r; + unsigned int chunk, r; + + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) + return 0; if (!sanity(i)) return 0; - bytes = push_pipe(i, bytes, &i_head, &r); while (bytes) { - size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + struct page *page = append_pipe(i, bytes, &r); + char *p; + + if (!page) + break; + chunk = min_t(size_t, bytes, PAGE_SIZE - r); + p = kmap_local_page(page); sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); kunmap_local(p); - i->head = i_head; - i->iov_offset = r + chunk; - bytes -= chunk; off += chunk; - r = 0; - i_head++; + bytes -= chunk; } *sump = sum; - i->count -= off; return off; } @@ -550,39 +581,36 @@ static int copyout_mc(void __user *to, const void *from, size_t n) static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - unsigned int valid = pipe->head; - size_t n, off, xfer = 0; + size_t xfer = 0; + unsigned int off, chunk; + + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) + return 0; if (!sanity(i)) return 0; - n = push_pipe(i, bytes, &i_head, &off); - while (n) { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + while (bytes) { + struct page *page = append_pipe(i, bytes, &off); unsigned long rem; + char *p; + + if (!page) + break; + chunk = min_t(size_t, bytes, PAGE_SIZE - off); + p = kmap_local_page(page); rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); chunk -= rem; kunmap_local(p); - if (chunk) { - i->head = i_head; - i->iov_offset = off + chunk; - xfer += chunk; - valid = i_head + 1; - } + xfer += chunk; + bytes -= chunk; if (rem) { - pipe->bufs[i_head & p_mask].len -= rem; - pipe_discard_from(pipe, valid); + iov_iter_revert(i, rem); break; } - n -= chunk; - off = 0; - i_head++; } - i->count -= xfer; return xfer; } @@ -769,30 +797,27 @@ EXPORT_SYMBOL(copy_page_from_iter); static size_t pipe_zero(size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off; + unsigned int chunk, off; - if (!sanity(i)) + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) + if (!sanity(i)) return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + for (size_t n = bytes; n; n -= chunk) { + struct page *page = append_pipe(i, n, &off); + char *p; + + if (!page) + return bytes - n; + chunk = min_t(size_t, n, PAGE_SIZE - off); + p = kmap_local_page(page); memset(p + off, 0, chunk); kunmap_local(p); - i->head = i_head; - i->iov_offset = off + chunk; - n -= chunk; - off = 0; - i_head++; - } while (n); - i->count -= bytes; + } return bytes; } -- cgit v1.2.3 From e3b42964f84c028f352c11269661d47f6ca4ab2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jun 2022 02:52:03 -0400 Subject: ITER_PIPE: fold push_pipe() into __pipe_get_pages() Expand the only remaining call of push_pipe() (in __pipe_get_pages()), combine it with the page-collecting loop there. Note that the only reason it's not a loop doing append_pipe() is that append_pipe() is advancing, while iov_iter_get_pages() is not. As soon as it switches to saner semantics, this thing will switch to using append_pipe(). Signed-off-by: Al Viro --- lib/iov_iter.c | 80 ++++++++++++++++++---------------------------------------- 1 file changed, 25 insertions(+), 55 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e22c272cb420..bf600b4fe980 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -450,46 +450,6 @@ static inline void data_start(const struct iov_iter *i, *offp = off; } -static size_t push_pipe(struct iov_iter *i, size_t size, - int *iter_headp, size_t *offp) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int iter_head; - size_t off; - ssize_t left; - - if (unlikely(size > i->count)) - size = i->count; - if (unlikely(!size)) - return 0; - - left = size; - data_start(i, &iter_head, &off); - *iter_headp = iter_head; - *offp = off; - if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, iter_head); - - left -= PAGE_SIZE - off; - if (left <= 0) { - buf->len += size; - return size; - } - buf->len = PAGE_SIZE; - } - while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { - struct page *page = push_anon(pipe, - min_t(ssize_t, left, PAGE_SIZE)); - if (!page) - break; - - left -= PAGE_SIZE; - if (left <= 0) - return size; - } - return size - left; -} - static size_t copy_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { @@ -1359,23 +1319,33 @@ static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, int iter_head, - size_t *start) + size_t off) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - ssize_t n = push_pipe(i, maxsize, &iter_head, start); - if (!n) - return -EFAULT; + ssize_t left = maxsize; - maxsize = n; - n += *start; - while (n > 0) { - get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); - iter_head++; - n -= PAGE_SIZE; - } + if (off) { + struct pipe_buffer *buf = pipe_buf(pipe, iter_head); - return maxsize; + get_page(*pages++ = buf->page); + left -= PAGE_SIZE - off; + if (left <= 0) { + buf->len += maxsize; + return maxsize; + } + buf->len = PAGE_SIZE; + } + while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + struct page *page = push_anon(pipe, + min_t(ssize_t, left, PAGE_SIZE)); + if (!page) + break; + get_page(*pages++ = page); + left -= PAGE_SIZE; + if (left <= 0) + return maxsize; + } + return maxsize - left ? : -EFAULT; } static ssize_t pipe_get_pages(struct iov_iter *i, @@ -1393,7 +1363,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i, npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); capacity = min(npages, maxpages) * PAGE_SIZE - *start; - return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, *start); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1575,7 +1545,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, iter_head, start); + n = __pipe_get_pages(i, maxsize, p, iter_head, *start); if (n > 0) *pages = p; else -- cgit v1.2.3 From ca591967543ab1af7e6e68bd505ef7869d3f2175 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jun 2022 14:26:23 -0400 Subject: ITER_PIPE: lose iter_head argument of __pipe_get_pages() it's only used to get to the partial buffer we can add to, and that's always the last one, i.e. pipe->head - 1. Signed-off-by: Al Viro --- lib/iov_iter.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index bf600b4fe980..95c56d42505b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1318,14 +1318,13 @@ EXPORT_SYMBOL(iov_iter_gap_alignment); static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, - int iter_head, size_t off) { struct pipe_inode_info *pipe = i->pipe; ssize_t left = maxsize; if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, iter_head); + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); get_page(*pages++ = buf->page); left -= PAGE_SIZE - off; @@ -1363,7 +1362,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i, npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); capacity = min(npages, maxpages) * PAGE_SIZE - *start; - return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, *start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, *start); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1545,7 +1544,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, iter_head, *start); + n = __pipe_get_pages(i, maxsize, p, *start); if (n > 0) *pages = p; else -- cgit v1.2.3 From 2c855de93314e9573f31044976ffd89cb70a2dbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 16:03:25 -0400 Subject: ITER_PIPE: clean pipe_advance() up instead of setting ->iov_offset for new position and calling pipe_truncate() to adjust ->len of the last buffer and discard everything after it, adjust ->len at the same time we set ->iov_offset and use pipe_discard_from() to deal with buffers past that. Signed-off-by: Al Viro --- lib/iov_iter.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 95c56d42505b..402d49688a16 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -845,27 +845,27 @@ static inline void pipe_truncate(struct iov_iter *i) static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - if (size) { - struct pipe_buffer *buf; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off = i->iov_offset, left = size; + unsigned int off = i->iov_offset; + if (!off && !size) { + pipe_discard_from(pipe, i->start_head); // discard everything + return; + } + i->count -= size; + while (1) { + struct pipe_buffer *buf = pipe_buf(pipe, i->head); if (off) /* make it relative to the beginning of buffer */ - left += off - pipe->bufs[i_head & p_mask].offset; - while (1) { - buf = &pipe->bufs[i_head & p_mask]; - if (left <= buf->len) - break; - left -= buf->len; - i_head++; + size += off - buf->offset; + if (size <= buf->len) { + buf->len = size; + i->iov_offset = buf->offset + size; + break; } - i->head = i_head; - i->iov_offset = buf->offset + left; + size -= buf->len; + i->head++; + off = 0; } - i->count -= size; - /* ... and discard everything past that point */ - pipe_truncate(i); + pipe_discard_from(pipe, i->head + 1); // discard everything past this one } static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) -- cgit v1.2.3 From 92acdc4f37207c556baee0ea28ce0823d22b9812 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2022 17:54:35 -0400 Subject: ITER_PIPE: clean iov_iter_revert() Fold pipe_truncate() into it, clean up. We can release buffers in the same loop where we walk backwards to the iterator beginning looking for the place where the new position will be. Signed-off-by: Al Viro --- lib/iov_iter.c | 60 ++++++++++++++-------------------------------------------- 1 file changed, 14 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 402d49688a16..c2e08004a1eb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -816,32 +816,6 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt } EXPORT_SYMBOL(copy_page_from_iter_atomic); -static inline void pipe_truncate(struct iov_iter *i) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_tail = pipe->tail; - unsigned int p_head = pipe->head; - unsigned int p_mask = pipe->ring_size - 1; - - if (!pipe_empty(p_head, p_tail)) { - struct pipe_buffer *buf; - unsigned int i_head = i->head; - size_t off = i->iov_offset; - - if (off) { - buf = &pipe->bufs[i_head & p_mask]; - buf->len = off - buf->offset; - i_head++; - } - while (p_head != i_head) { - p_head--; - pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); - } - - pipe->head = p_head; - } -} - static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; @@ -936,28 +910,22 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) i->count += unroll; if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off = i->iov_offset; - while (1) { - struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; - size_t n = off - b->offset; - if (unroll < n) { - off -= unroll; - break; - } - unroll -= n; - if (!unroll && i_head == i->start_head) { - off = 0; - break; + unsigned int head = pipe->head; + + while (head > i->start_head) { + struct pipe_buffer *b = pipe_buf(pipe, --head); + if (unroll < b->len) { + b->len -= unroll; + i->iov_offset = b->offset + b->len; + i->head = head; + return; } - i_head--; - b = &pipe->bufs[i_head & p_mask]; - off = b->offset + b->len; + unroll -= b->len; + pipe_buf_release(pipe, b); + pipe->head--; } - i->iov_offset = off; - i->head = i_head; - pipe_truncate(i); + i->iov_offset = 0; + i->head = head; return; } if (unlikely(iov_iter_is_discard(i))) -- cgit v1.2.3 From 10f525a8cd7a525e9fc73288bb35428c9cad5e63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 02:02:51 -0400 Subject: ITER_PIPE: cache the type of last buffer We often need to find whether the last buffer is anon or not, and currently it's rather clumsy: check if ->iov_offset is non-zero (i.e. that pipe is not empty) if so, get the corresponding pipe_buffer and check its ->ops if it's &default_pipe_buf_ops, we have an anon buffer. Let's replace the use of ->iov_offset (which is nowhere near similar to its role for other flavours) with signed field (->last_offset), with the following rules: empty, no buffers occupied: 0 anon, with bytes up to N-1 filled: N zero-copy, with bytes up to N-1 filled: -N That way abs(i->last_offset) is equal to what used to be in i->iov_offset and empty vs. anon vs. zero-copy can be distinguished by the sign of i->last_offset. Checks for "should we extend the last buffer or should we start a new one?" become easier to follow that way. Note that most of the operations can only be done in a sane state - i.e. when the pipe has nothing past the current position of iterator. About the only thing that could be done outside of that state is iov_iter_advance(), which transitions to the sane state by truncating the pipe. There are only two cases where we leave the sane state: 1) iov_iter_get_pages()/iov_iter_get_pages_alloc(). Will be dealt with later, when we make get_pages advancing - the callers are actually happier that way. 2) iov_iter copied, then something is put into the copy. Since they share the underlying pipe, the original gets behind. When we decide that we are done with the copy (original is not usable until then) we advance the original. direct_io used to be done that way; nowadays it operates on the original and we do iov_iter_revert() to discard the excessive data. At the moment there's nothing in the kernel that could do that to ITER_PIPE iterators, so this reason for insane state is theoretical right now. Signed-off-by: Al Viro --- include/linux/uio.h | 5 +++- lib/iov_iter.c | 77 ++++++++++++++++++++++++++--------------------------- 2 files changed, 42 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/include/linux/uio.h b/include/linux/uio.h index 85bef84fd294..e7fc29b5ad19 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -40,7 +40,10 @@ struct iov_iter { bool nofault; bool data_source; bool user_backed; - size_t iov_offset; + union { + size_t iov_offset; + int last_offset; + }; size_t count; union { const struct iovec *iov; diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c2e08004a1eb..8834f3f61220 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -199,7 +199,7 @@ static bool sanity(const struct iov_iter *i) unsigned int i_head = i->head; unsigned int idx; - if (i->iov_offset) { + if (i->last_offset) { struct pipe_buffer *p; if (unlikely(p_occupancy == 0)) goto Bad; // pipe must be non-empty @@ -207,7 +207,7 @@ static bool sanity(const struct iov_iter *i) goto Bad; // must be at the last buffer... p = pipe_buf(pipe, i_head); - if (unlikely(p->offset + p->len != i->iov_offset)) + if (unlikely(p->offset + p->len != abs(i->last_offset))) goto Bad; // ... at the end of segment } else { if (i_head != p_head) @@ -215,7 +215,7 @@ static bool sanity(const struct iov_iter *i) } return true; Bad: - printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); + printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset); printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", p_head, p_tail, pipe->ring_size); for (idx = 0; idx < pipe->ring_size; idx++) @@ -259,30 +259,31 @@ static void push_page(struct pipe_inode_info *pipe, struct page *page, get_page(page); } -static inline bool allocated(struct pipe_buffer *buf) +static inline int last_offset(const struct pipe_buffer *buf) { - return buf->ops == &default_pipe_buf_ops; + if (buf->ops == &default_pipe_buf_ops) + return buf->len; // buf->offset is 0 for those + else + return -(buf->offset + buf->len); } static struct page *append_pipe(struct iov_iter *i, size_t size, unsigned int *off) { struct pipe_inode_info *pipe = i->pipe; - size_t offset = i->iov_offset; + int offset = i->last_offset; struct pipe_buffer *buf; struct page *page; - if (offset && offset < PAGE_SIZE) { - // some space in the last buffer; can we add to it? + if (offset > 0 && offset < PAGE_SIZE) { + // some space in the last buffer; add to it buf = pipe_buf(pipe, pipe->head - 1); - if (allocated(buf)) { - size = min_t(size_t, size, PAGE_SIZE - offset); - buf->len += size; - i->iov_offset += size; - i->count -= size; - *off = offset; - return buf->page; - } + size = min_t(size_t, size, PAGE_SIZE - offset); + buf->len += size; + i->last_offset += size; + i->count -= size; + *off = offset; + return buf->page; } // OK, we need a new buffer *off = 0; @@ -293,7 +294,7 @@ static struct page *append_pipe(struct iov_iter *i, size_t size, if (!page) return NULL; i->head = pipe->head - 1; - i->iov_offset = size; + i->last_offset = size; i->count -= size; return page; } @@ -313,11 +314,11 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by if (!sanity(i)) return 0; - if (offset && i->iov_offset == offset) { // could we merge it? + if (offset && i->last_offset == -offset) { // could we merge it? struct pipe_buffer *buf = pipe_buf(pipe, head - 1); if (buf->page == page) { buf->len += bytes; - i->iov_offset += bytes; + i->last_offset -= bytes; i->count -= bytes; return bytes; } @@ -326,7 +327,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; push_page(pipe, page, offset, bytes); - i->iov_offset = offset + bytes; + i->last_offset = -(offset + bytes); i->head = head; i->count -= bytes; return bytes; @@ -438,16 +439,15 @@ EXPORT_SYMBOL(iov_iter_init); static inline void data_start(const struct iov_iter *i, unsigned int *iter_headp, size_t *offp) { - unsigned int iter_head = i->head; - size_t off = i->iov_offset; + int off = i->last_offset; - if (off && (!allocated(pipe_buf(i->pipe, iter_head)) || - off == PAGE_SIZE)) { - iter_head++; - off = 0; + if (off > 0 && off < PAGE_SIZE) { // anon and not full + *iter_headp = i->pipe->head - 1; + *offp = off; + } else { + *iter_headp = i->pipe->head; + *offp = 0; } - *iter_headp = iter_head; - *offp = off; } static size_t copy_pipe_to_iter(const void *addr, size_t bytes, @@ -819,7 +819,7 @@ EXPORT_SYMBOL(copy_page_from_iter_atomic); static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - unsigned int off = i->iov_offset; + int off = i->last_offset; if (!off && !size) { pipe_discard_from(pipe, i->start_head); // discard everything @@ -829,10 +829,10 @@ static void pipe_advance(struct iov_iter *i, size_t size) while (1) { struct pipe_buffer *buf = pipe_buf(pipe, i->head); if (off) /* make it relative to the beginning of buffer */ - size += off - buf->offset; + size += abs(off) - buf->offset; if (size <= buf->len) { buf->len = size; - i->iov_offset = buf->offset + size; + i->last_offset = last_offset(buf); break; } size -= buf->len; @@ -916,7 +916,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) struct pipe_buffer *b = pipe_buf(pipe, --head); if (unroll < b->len) { b->len -= unroll; - i->iov_offset = b->offset + b->len; + i->last_offset = last_offset(b); i->head = head; return; } @@ -924,7 +924,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) pipe_buf_release(pipe, b); pipe->head--; } - i->iov_offset = 0; + i->last_offset = 0; i->head = head; return; } @@ -1027,7 +1027,7 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, .pipe = pipe, .head = pipe->head, .start_head = pipe->head, - .iov_offset = 0, + .last_offset = 0, .count = count }; } @@ -1158,13 +1158,12 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, return iov_iter_aligned_bvec(i, addr_mask, len_mask); if (iov_iter_is_pipe(i)) { - unsigned int p_mask = i->pipe->ring_size - 1; size_t size = i->count; if (size & len_mask) return false; - if (size && allocated(&i->pipe->bufs[i->head & p_mask])) { - if (i->iov_offset & addr_mask) + if (size && i->last_offset > 0) { + if (i->last_offset & addr_mask) return false; } @@ -1243,8 +1242,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) if (iov_iter_is_pipe(i)) { size_t size = i->count; - if (size && i->iov_offset && allocated(pipe_buf(i->pipe, i->head))) - return size | i->iov_offset; + if (size && i->last_offset > 0) + return size | i->last_offset; return size; } -- cgit v1.2.3 From 12d426ab64a1c75f1b2ee5c33e933a4c16004049 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 09:44:38 -0400 Subject: ITER_PIPE: fold data_start() and pipe_space_for_user() together All their callers are next to each other; all of them want the total amount of pages and, possibly, the offset in the partial final buffer. Combine into a new helper (pipe_npages()), fix the bogosity in pipe_space_for_user(), while we are at it. Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 20 -------------------- lib/iov_iter.c | 44 +++++++++++++++++++------------------------- 2 files changed, 19 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4ea496924106..6cb65df3e3ba 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -156,26 +156,6 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } -/** - * pipe_space_for_user - Return number of slots available to userspace - * @head: The pipe ring head pointer - * @tail: The pipe ring tail pointer - * @pipe: The pipe info structure - */ -static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail, - struct pipe_inode_info *pipe) -{ - unsigned int p_occupancy, p_space; - - p_occupancy = pipe_occupancy(head, tail); - if (p_occupancy >= pipe->max_usage) - return 0; - p_space = pipe->ring_size - p_occupancy; - if (p_space > pipe->max_usage) - p_space = pipe->max_usage; - return p_space; -} - /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8834f3f61220..12dda1013bea 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -436,18 +436,20 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static inline void data_start(const struct iov_iter *i, - unsigned int *iter_headp, size_t *offp) +// returns the offset in partial buffer (if any) +static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages) { + struct pipe_inode_info *pipe = i->pipe; + int used = pipe->head - pipe->tail; int off = i->last_offset; + *npages = max((int)pipe->max_usage - used, 0); + if (off > 0 && off < PAGE_SIZE) { // anon and not full - *iter_headp = i->pipe->head - 1; - *offp = off; - } else { - *iter_headp = i->pipe->head; - *offp = 0; + (*npages)++; + return off; } + return 0; } static size_t copy_pipe_to_iter(const void *addr, size_t bytes, @@ -1318,18 +1320,16 @@ static ssize_t pipe_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - unsigned int iter_head, npages; + unsigned int npages, off; size_t capacity; if (!sanity(i)) return -EFAULT; - data_start(i, &iter_head, start); - /* Amount of free space: some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); - capacity = min(npages, maxpages) * PAGE_SIZE - *start; + *start = off = pipe_npages(i, &npages); + capacity = min(npages, maxpages) * PAGE_SIZE - off; - return __pipe_get_pages(i, min(maxsize, capacity), pages, *start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, off); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1494,24 +1494,22 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - unsigned int iter_head, npages; + unsigned int npages, off; ssize_t n; if (!sanity(i)) return -EFAULT; - data_start(i, &iter_head, start); - /* Amount of free space: some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); - n = npages * PAGE_SIZE - *start; + *start = off = pipe_npages(i, &npages); + n = npages * PAGE_SIZE - off; if (maxsize > n) maxsize = n; else - npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); + npages = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, *start); + n = __pipe_get_pages(i, maxsize, p, off); if (n > 0) *pages = p; else @@ -1739,16 +1737,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (iov_iter_is_bvec(i)) return bvec_npages(i, maxpages); if (iov_iter_is_pipe(i)) { - unsigned int iter_head; int npages; - size_t off; if (!sanity(i)) return 0; - data_start(i, &iter_head, &off); - /* some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); + pipe_npages(i, &npages); return min(npages, maxpages); } if (iov_iter_is_xarray(i)) { -- cgit v1.2.3 From 91329559eb07c9b12c7ce80e893ad39579c40aa2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 20:38:20 -0400 Subject: iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper Incidentally, ITER_XARRAY did *not* free the sucker in case when iter_xarray_populate_pages() returned 0... Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 12dda1013bea..e14749711e34 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1506,15 +1506,10 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, maxsize = n; else npages = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); - p = get_pages_array(npages); + *pages = p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, off); - if (n > 0) - *pages = p; - else - kvfree(p); - return n; + return __pipe_get_pages(i, maxsize, p, off); } static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, @@ -1544,10 +1539,9 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, count++; } - p = get_pages_array(count); + *pages = p = get_pages_array(count); if (!p) return -ENOMEM; - *pages = p; nr = iter_xarray_populate_pages(p, i->xarray, index, count); if (nr == 0) @@ -1556,7 +1550,7 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); } -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -1583,16 +1577,12 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *start = addr % PAGE_SIZE; addr &= PAGE_MASK; n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - p = get_pages_array(n); + *pages = p = get_pages_array(n); if (!p) return -ENOMEM; res = get_user_pages_fast(addr, n, gup_flags, p); - if (unlikely(res <= 0)) { - kvfree(p); - *pages = NULL; + if (unlikely(res <= 0)) return res; - } - *pages = p; return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { @@ -1613,6 +1603,22 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return iter_xarray_get_pages_alloc(i, pages, maxsize, start); return -EFAULT; } + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + ssize_t len; + + *pages = NULL; + + len = __iov_iter_get_pages_alloc(i, pages, maxsize, start); + if (len <= 0) { + kvfree(*pages); + *pages = NULL; + } + return len; +} EXPORT_SYMBOL(iov_iter_get_pages_alloc); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, -- cgit v1.2.3 From c81ce28df500b04444ef97dc82a7b0299ce717e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 15:15:14 -0400 Subject: iov_iter_get_pages(): sanity-check arguments zero maxpages is bogus, but best treated as "just return 0"; NULL pages, OTOH, should be treated as a hard bug. get rid of now completely useless checks in xarray_get_pages{,_alloc}(). Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e14749711e34..7d3158d1f8ea 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1368,9 +1368,6 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, size_t size = maxsize; loff_t pos; - if (!size || !maxpages) - return 0; - pos = i->xarray_start + i->iov_offset; index = pos >> PAGE_SHIFT; offset = pos & ~PAGE_MASK; @@ -1440,10 +1437,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (!maxsize) + if (!maxsize || !maxpages) return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; + BUG_ON(!pages); if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; @@ -1522,9 +1520,6 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, size_t size = maxsize; loff_t pos; - if (!size) - return 0; - pos = i->xarray_start + i->iov_offset; index = pos >> PAGE_SHIFT; offset = pos & ~PAGE_MASK; -- cgit v1.2.3 From acbdeb8320b0a470bef1b6c0105d8c2bbc4c4ba0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 13:35:35 -0400 Subject: unify pipe_get_pages() and pipe_get_pages_alloc() The differences between those two are * pipe_get_pages() gets a non-NULL struct page ** value pointing to preallocated array + array size. * pipe_get_pages_alloc() gets an address of struct page ** variable that contains NULL, allocates the array and (on success) stores its address in that variable. Not hard to combine - always pass struct page ***, have the previous pipe_get_pages_alloc() caller pass ~0U as cap for array size. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 49 +++++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7d3158d1f8ea..916c628f80a0 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1284,6 +1284,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); +static struct page **get_pages_array(size_t n) +{ + return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); +} + static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, @@ -1317,10 +1322,11 @@ static inline ssize_t __pipe_get_pages(struct iov_iter *i, } static ssize_t pipe_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, unsigned maxpages, + struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { unsigned int npages, off; + struct page **p; size_t capacity; if (!sanity(i)) @@ -1328,8 +1334,15 @@ static ssize_t pipe_get_pages(struct iov_iter *i, *start = off = pipe_npages(i, &npages); capacity = min(npages, maxpages) * PAGE_SIZE - off; + maxsize = min(maxsize, capacity); + p = *pages; + if (!p) { + *pages = p = get_pages_array(DIV_ROUND_UP(maxsize + off, PAGE_SIZE)); + if (!p) + return -ENOMEM; + } - return __pipe_get_pages(i, min(maxsize, capacity), pages, off); + return __pipe_get_pages(i, maxsize, p, off); } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1475,41 +1488,13 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, pages, maxsize, maxpages, start); + return pipe_get_pages(i, &pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; } EXPORT_SYMBOL(iov_iter_get_pages); -static struct page **get_pages_array(size_t n) -{ - return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); -} - -static ssize_t pipe_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, - size_t *start) -{ - struct page **p; - unsigned int npages, off; - ssize_t n; - - if (!sanity(i)) - return -EFAULT; - - *start = off = pipe_npages(i, &npages); - n = npages * PAGE_SIZE - off; - if (maxsize > n) - maxsize = n; - else - npages = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); - *pages = p = get_pages_array(npages); - if (!p) - return -ENOMEM; - return __pipe_get_pages(i, maxsize, p, off); -} - static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *_start_offset) @@ -1593,7 +1578,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) - return pipe_get_pages_alloc(i, pages, maxsize, start); + return pipe_get_pages(i, pages, maxsize, ~0U, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages_alloc(i, pages, maxsize, start); return -EFAULT; -- cgit v1.2.3 From 68fe506f3731ecf7881de9512cc5f4c14fd13f3a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 13:48:03 -0400 Subject: unify xarray_get_pages() and xarray_get_pages_alloc() same as for pipes Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 49 ++++++++++--------------------------------------- 1 file changed, 10 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 916c628f80a0..6904a1cc36f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1373,7 +1373,7 @@ static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa } static ssize_t iter_xarray_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, + struct page ***pages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { unsigned nr, offset; @@ -1398,7 +1398,13 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, if (count > maxpages) count = maxpages; - nr = iter_xarray_populate_pages(pages, i->xarray, index, count); + if (!*pages) { + *pages = get_pages_array(count); + if (!*pages) + return -ENOMEM; + } + + nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); if (nr == 0) return 0; @@ -1490,46 +1496,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (iov_iter_is_pipe(i)) return pipe_get_pages(i, &pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); + return iter_xarray_get_pages(i, &pages, maxsize, maxpages, start); return -EFAULT; } EXPORT_SYMBOL(iov_iter_get_pages); -static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, - size_t *_start_offset) -{ - struct page **p; - unsigned nr, offset; - pgoff_t index, count; - size_t size = maxsize; - loff_t pos; - - pos = i->xarray_start + i->iov_offset; - index = pos >> PAGE_SHIFT; - offset = pos & ~PAGE_MASK; - *_start_offset = offset; - - count = 1; - if (size > PAGE_SIZE - offset) { - size -= PAGE_SIZE - offset; - count += size >> PAGE_SHIFT; - size &= ~PAGE_MASK; - if (size) - count++; - } - - *pages = p = get_pages_array(count); - if (!p) - return -ENOMEM; - - nr = iter_xarray_populate_pages(p, i->xarray, index, count); - if (nr == 0) - return 0; - - return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); -} - static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) @@ -1580,7 +1551,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, ~0U, start); if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages_alloc(i, pages, maxsize, start); + return iter_xarray_get_pages(i, pages, maxsize, ~0U, start); return -EFAULT; } -- cgit v1.2.3 From 451c0ba9475ebdce36249c5c769efa5d580d1d83 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 13:54:15 -0400 Subject: unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts same as for pipes and xarrays; after that iov_iter_get_pages() becomes a wrapper for __iov_iter_get_pages_alloc(). Signed-off-by: Al Viro --- lib/iov_iter.c | 86 ++++++++++++++++++---------------------------------------- 1 file changed, 27 insertions(+), 59 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 6904a1cc36f2..8c6cdc1cf832 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1448,19 +1448,18 @@ static struct page *first_bvec_segment(const struct iov_iter *i, return page; } -ssize_t iov_iter_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, unsigned maxpages, - size_t *start) +static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, size_t *start) { int n, res; if (maxsize > i->count) maxsize = i->count; - if (!maxsize || !maxpages) + if (!maxsize) return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; - BUG_ON(!pages); if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; @@ -1477,83 +1476,52 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; - res = get_user_pages_fast(addr, n, gup_flags, pages); + if (!*pages) { + *pages = get_pages_array(n); + if (!*pages) + return -ENOMEM; + } + res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; return min_t(size_t, maxsize, res * PAGE_SIZE - *start); } if (iov_iter_is_bvec(i)) { + struct page **p; struct page *page; page = first_bvec_segment(i, &maxsize, start); n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); if (n > maxpages) n = maxpages; + p = *pages; + if (!p) { + *pages = p = get_pages_array(n); + if (!p) + return -ENOMEM; + } for (int k = 0; k < n; k++) - get_page(*pages++ = page++); + get_page(*p++ = page++); return min_t(size_t, maxsize, n * PAGE_SIZE - *start); } if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, &pages, maxsize, maxpages, start); + return pipe_get_pages(i, pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, &pages, maxsize, maxpages, start); + return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; } -EXPORT_SYMBOL(iov_iter_get_pages); -static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - struct page **p; - int n, res; - - if (maxsize > i->count) - maxsize = i->count; - if (!maxsize) + if (!maxpages) return 0; - if (maxsize > MAX_RW_COUNT) - maxsize = MAX_RW_COUNT; - - if (likely(user_backed_iter(i))) { - unsigned int gup_flags = 0; - unsigned long addr; - - if (iov_iter_rw(i) != WRITE) - gup_flags |= FOLL_WRITE; - if (i->nofault) - gup_flags |= FOLL_NOFAULT; - - addr = first_iovec_segment(i, &maxsize); - *start = addr % PAGE_SIZE; - addr &= PAGE_MASK; - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - *pages = p = get_pages_array(n); - if (!p) - return -ENOMEM; - res = get_user_pages_fast(addr, n, gup_flags, p); - if (unlikely(res <= 0)) - return res; - return min_t(size_t, maxsize, res * PAGE_SIZE - *start); - } - if (iov_iter_is_bvec(i)) { - struct page *page; + BUG_ON(!pages); - page = first_bvec_segment(i, &maxsize, start); - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - *pages = p = get_pages_array(n); - if (!p) - return -ENOMEM; - for (int k = 0; k < n; k++) - get_page(*p++ = page++); - return min_t(size_t, maxsize, n * PAGE_SIZE - *start); - } - if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, pages, maxsize, ~0U, start); - if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, pages, maxsize, ~0U, start); - return -EFAULT; + return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); } +EXPORT_SYMBOL(iov_iter_get_pages); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, @@ -1563,7 +1531,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *pages = NULL; - len = __iov_iter_get_pages_alloc(i, pages, maxsize, start); + len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start); if (len <= 0) { kvfree(*pages); *pages = NULL; -- cgit v1.2.3 From 0aa4fc32f54028f6fbb35bf71df4b0d86ff1662b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 20:30:35 -0400 Subject: ITER_XARRAY: don't open-code DIV_ROUND_UP() Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8c6cdc1cf832..c78129e709f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1386,15 +1386,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, offset = pos & ~PAGE_MASK; *_start_offset = offset; - count = 1; - if (size > PAGE_SIZE - offset) { - size -= PAGE_SIZE - offset; - count += size >> PAGE_SHIFT; - size &= ~PAGE_MASK; - if (size) - count++; - } - + count = DIV_ROUND_UP(size + offset, PAGE_SIZE); if (count > maxpages) count = maxpages; -- cgit v1.2.3 From 8520008417c581c4c22e39597f92b9814ae34c31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 14:30:39 -0400 Subject: fold __pipe_get_pages() into pipe_get_pages() ... and don't mangle maxsize there - turn the loop into counting one instead. Easier to see that we won't run out of array that way. Note that special treatment of the partial buffer in that thing is an artifact of the non-advancing semantics of iov_iter_get_pages() - if not for that, it would be append_pipe(), same as the body of the loop that follows it. IOW, once we make iov_iter_get_pages() advancing, the whole thing will turn into calculate how many pages do we want allocate an array (if needed) call append_pipe() that many times. Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 75 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c78129e709f2..a9446efac70d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1289,60 +1289,61 @@ static struct page **get_pages_array(size_t n) return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); } -static inline ssize_t __pipe_get_pages(struct iov_iter *i, - size_t maxsize, - struct page **pages, - size_t off) -{ - struct pipe_inode_info *pipe = i->pipe; - ssize_t left = maxsize; - - if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); - - get_page(*pages++ = buf->page); - left -= PAGE_SIZE - off; - if (left <= 0) { - buf->len += maxsize; - return maxsize; - } - buf->len = PAGE_SIZE; - } - while (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { - struct page *page = push_anon(pipe, - min_t(ssize_t, left, PAGE_SIZE)); - if (!page) - break; - get_page(*pages++ = page); - left -= PAGE_SIZE; - if (left <= 0) - return maxsize; - } - return maxsize - left ? : -EFAULT; -} - static ssize_t pipe_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { + struct pipe_inode_info *pipe = i->pipe; unsigned int npages, off; struct page **p; - size_t capacity; + ssize_t left; + int count; if (!sanity(i)) return -EFAULT; *start = off = pipe_npages(i, &npages); - capacity = min(npages, maxpages) * PAGE_SIZE - off; - maxsize = min(maxsize, capacity); + count = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); + if (count > npages) + count = npages; + if (count > maxpages) + count = maxpages; p = *pages; if (!p) { - *pages = p = get_pages_array(DIV_ROUND_UP(maxsize + off, PAGE_SIZE)); + *pages = p = get_pages_array(count); if (!p) return -ENOMEM; } - return __pipe_get_pages(i, maxsize, p, off); + left = maxsize; + npages = 0; + if (off) { + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); + + get_page(*p++ = buf->page); + left -= PAGE_SIZE - off; + if (left <= 0) { + buf->len += maxsize; + return maxsize; + } + buf->len = PAGE_SIZE; + npages = 1; + } + for ( ; npages < count; npages++) { + struct page *page; + unsigned int size = min_t(ssize_t, left, PAGE_SIZE); + + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + break; + page = push_anon(pipe, size); + if (!page) + break; + get_page(*p++ = page); + left -= size; + } + if (!npages) + return -EFAULT; + return maxsize - left; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, -- cgit v1.2.3 From 3cf42da327f26eb4461864dd64812345b37f4fd9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2022 14:45:41 -0400 Subject: iov_iter: saner helper for page array allocation All call sites of get_pages_array() are essenitally identical now. Replace with common helper... Returns number of slots available in resulting array or 0 on OOM; it's up to the caller to make sure it doesn't ask to zero-entry array (i.e. neither maxpages nor size are allowed to be zero). Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- lib/iov_iter.c | 77 ++++++++++++++++++++++++---------------------------------- 1 file changed, 32 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a9446efac70d..f003a20d8683 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1284,9 +1284,20 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); -static struct page **get_pages_array(size_t n) +static int want_pages_array(struct page ***res, size_t size, + size_t start, unsigned int maxpages) { - return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); + unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE); + + if (count > maxpages) + count = maxpages; + WARN_ON(!count); // caller should've prevented that + if (!*res) { + *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); + if (!*res) + return 0; + } + return count; } static ssize_t pipe_get_pages(struct iov_iter *i, @@ -1294,27 +1305,20 @@ static ssize_t pipe_get_pages(struct iov_iter *i, size_t *start) { struct pipe_inode_info *pipe = i->pipe; - unsigned int npages, off; + unsigned int npages, off, count; struct page **p; ssize_t left; - int count; if (!sanity(i)) return -EFAULT; *start = off = pipe_npages(i, &npages); - count = DIV_ROUND_UP(maxsize + off, PAGE_SIZE); - if (count > npages) - count = npages; - if (count > maxpages) - count = maxpages; + if (!npages) + return -EFAULT; + count = want_pages_array(pages, maxsize, off, min(npages, maxpages)); + if (!count) + return -ENOMEM; p = *pages; - if (!p) { - *pages = p = get_pages_array(count); - if (!p) - return -ENOMEM; - } - left = maxsize; npages = 0; if (off) { @@ -1377,9 +1381,8 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { - unsigned nr, offset; - pgoff_t index, count; - size_t size = maxsize; + unsigned nr, offset, count; + pgoff_t index; loff_t pos; pos = i->xarray_start + i->iov_offset; @@ -1387,16 +1390,9 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, offset = pos & ~PAGE_MASK; *_start_offset = offset; - count = DIV_ROUND_UP(size + offset, PAGE_SIZE); - if (count > maxpages) - count = maxpages; - - if (!*pages) { - *pages = get_pages_array(count); - if (!*pages) - return -ENOMEM; - } - + count = want_pages_array(pages, maxsize, offset, maxpages); + if (!count) + return -ENOMEM; nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); if (nr == 0) return 0; @@ -1445,7 +1441,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, size_t *start) { - int n, res; + unsigned int n; if (maxsize > i->count) maxsize = i->count; @@ -1457,6 +1453,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, if (likely(user_backed_iter(i))) { unsigned int gup_flags = 0; unsigned long addr; + int res; if (iov_iter_rw(i) != WRITE) gup_flags |= FOLL_WRITE; @@ -1466,14 +1463,9 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, addr = first_iovec_segment(i, &maxsize); *start = addr % PAGE_SIZE; addr &= PAGE_MASK; - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - if (n > maxpages) - n = maxpages; - if (!*pages) { - *pages = get_pages_array(n); - if (!*pages) - return -ENOMEM; - } + n = want_pages_array(pages, maxsize, *start, maxpages); + if (!n) + return -ENOMEM; res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; @@ -1484,15 +1476,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page *page; page = first_bvec_segment(i, &maxsize, start); - n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); - if (n > maxpages) - n = maxpages; + n = want_pages_array(pages, maxsize, *start, maxpages); + if (!n) + return -ENOMEM; p = *pages; - if (!p) { - *pages = p = get_pages_array(n); - if (!p) - return -ENOMEM; - } for (int k = 0; k < n; k++) get_page(*p++ = page++); return min_t(size_t, maxsize, n * PAGE_SIZE - *start); -- cgit v1.2.3 From eba2d3d798295dc43cae8fade102f9d083a2a741 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 13:05:12 -0400 Subject: get rid of non-advancing variants mechanical change; will be further massaged in subsequent commits Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/uio.h | 24 ++---------------------- lib/iov_iter.c | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/include/linux/uio.h b/include/linux/uio.h index b70d28693400..5896af36199c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -247,9 +247,9 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); -ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); @@ -351,24 +351,4 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } -static inline ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, - size_t maxsize, unsigned maxpages, size_t *start) -{ - ssize_t res = iov_iter_get_pages(i, pages, maxsize, maxpages, start); - - if (res >= 0) - iov_iter_advance(i, res); - return res; -} - -static inline ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, - size_t maxsize, size_t *start) -{ - ssize_t res = iov_iter_get_pages_alloc(i, pages, maxsize, start); - - if (res >= 0) - iov_iter_advance(i, res); - return res; -} - #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f003a20d8683..c48c83602aae 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1328,6 +1328,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i, left -= PAGE_SIZE - off; if (left <= 0) { buf->len += maxsize; + iov_iter_advance(i, maxsize); return maxsize; } buf->len = PAGE_SIZE; @@ -1347,7 +1348,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, } if (!npages) return -EFAULT; - return maxsize - left; + maxsize -= left; + iov_iter_advance(i, maxsize); + return maxsize; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1397,7 +1400,9 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, if (nr == 0) return 0; - return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); + maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); + iov_iter_advance(i, maxsize); + return maxsize; } /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ @@ -1469,7 +1474,9 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; - return min_t(size_t, maxsize, res * PAGE_SIZE - *start); + maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start); + iov_iter_advance(i, maxsize); + return maxsize; } if (iov_iter_is_bvec(i)) { struct page **p; @@ -1481,8 +1488,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; p = *pages; for (int k = 0; k < n; k++) - get_page(*p++ = page++); - return min_t(size_t, maxsize, n * PAGE_SIZE - *start); + get_page(p[k] = page + k); + maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); + iov_iter_advance(i, maxsize); + return maxsize; } if (iov_iter_is_pipe(i)) return pipe_get_pages(i, pages, maxsize, maxpages, start); @@ -1491,7 +1500,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -EFAULT; } -ssize_t iov_iter_get_pages(struct iov_iter *i, +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { @@ -1501,9 +1510,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); } -EXPORT_SYMBOL(iov_iter_get_pages); +EXPORT_SYMBOL(iov_iter_get_pages2); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -1518,7 +1527,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, } return len; } -EXPORT_SYMBOL(iov_iter_get_pages_alloc); +EXPORT_SYMBOL(iov_iter_get_pages_alloc2); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) -- cgit v1.2.3 From 746de1f86fcd33464acac047f111eea877f2f7a0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jun 2022 16:38:53 -0400 Subject: pipe_get_pages(): switch to append_pipe() now that we are advancing the iterator, there's no need to treat the first page separately - just call append_pipe() in a loop. Signed-off-by: Al Viro --- lib/iov_iter.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c48c83602aae..415d51bbc727 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1304,10 +1304,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int npages, off, count; + unsigned int npages, count, off, chunk; struct page **p; - ssize_t left; + size_t left; if (!sanity(i)) return -EFAULT; @@ -1319,38 +1318,16 @@ static ssize_t pipe_get_pages(struct iov_iter *i, if (!count) return -ENOMEM; p = *pages; - left = maxsize; - npages = 0; - if (off) { - struct pipe_buffer *buf = pipe_buf(pipe, pipe->head - 1); - - get_page(*p++ = buf->page); - left -= PAGE_SIZE - off; - if (left <= 0) { - buf->len += maxsize; - iov_iter_advance(i, maxsize); - return maxsize; - } - buf->len = PAGE_SIZE; - npages = 1; - } - for ( ; npages < count; npages++) { - struct page *page; - unsigned int size = min_t(ssize_t, left, PAGE_SIZE); - - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) - break; - page = push_anon(pipe, size); + for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) { + struct page *page = append_pipe(i, left, &off); if (!page) break; + chunk = min_t(size_t, left, PAGE_SIZE - off); get_page(*p++ = page); - left -= size; } if (!npages) return -EFAULT; - maxsize -= left; - iov_iter_advance(i, maxsize); - return maxsize; + return maxsize - left; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, -- cgit v1.2.3 From 310d9d5a5009a93377200b98daa2d84aa2bd8160 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jun 2022 04:04:33 -0400 Subject: expand those iov_iter_advance()... Signed-off-by: Al Viro --- lib/iov_iter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 415d51bbc727..46ec07886d7b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1378,7 +1378,8 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, return 0; maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); - iov_iter_advance(i, maxsize); + i->iov_offset += maxsize; + i->count -= maxsize; return maxsize; } @@ -1467,7 +1468,13 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, for (int k = 0; k < n; k++) get_page(p[k] = page + k); maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); - iov_iter_advance(i, maxsize); + i->count -= maxsize; + i->iov_offset += maxsize; + if (i->iov_offset == i->bvec->bv_len) { + i->iov_offset = 0; + i->bvec++; + i->nr_segs--; + } return maxsize; } if (iov_iter_is_pipe(i)) -- cgit v1.2.3 From f0f6b614f83dbae99d283b7b12ab5dd2e04df979 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 23 Jun 2022 17:21:37 -0400 Subject: copy_page_to_iter(): don't split high-order page in case of ITER_PIPE ... just shove it into one pipe_buffer. Signed-off-by: Al Viro --- lib/iov_iter.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 46ec07886d7b..4e3696d349a4 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -706,30 +706,21 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) return false; } -static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - if (unlikely(iov_iter_is_pipe(i))) { - return copy_page_to_iter_pipe(page, offset, bytes, i); - } else { - void *kaddr = kmap_local_page(page); - size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); - kunmap_local(kaddr); - return wanted; - } -} - size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t res = 0; if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; + if (unlikely(iov_iter_is_pipe(i))) + return copy_page_to_iter_pipe(page, offset, bytes, i); page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { - size_t n = __copy_page_to_iter(page, offset, - min(bytes, (size_t)PAGE_SIZE - offset), i); + void *kaddr = kmap_local_page(page); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + n = _copy_to_iter(kaddr + offset, n, i); + kunmap_local(kaddr); res += n; bytes -= n; if (!bytes || !n) -- cgit v1.2.3 From c03f05f183cd15f4259684ab658fbc3d23797d99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Jul 2022 12:54:53 -0400 Subject: fix copy_page_from_iter() for compound destinations had been broken for ITER_BVEC et.al. since ever (OK, v3.17 when ITER_BVEC had first appeared)... Signed-off-by: Al Viro --- lib/iov_iter.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4e3696d349a4..4b7fce72e3e5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -738,13 +738,27 @@ EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (page_copy_sane(page, offset, bytes)) { + size_t res = 0; + if (!page_copy_sane(page, offset, bytes)) + return 0; + page += offset / PAGE_SIZE; // first subpage + offset %= PAGE_SIZE; + while (1) { void *kaddr = kmap_local_page(page); - size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + n = _copy_from_iter(kaddr + offset, n, i); kunmap_local(kaddr); - return wanted; + res += n; + bytes -= n; + if (!bytes || !n) + break; + offset += n; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } } - return 0; + return res; } EXPORT_SYMBOL(copy_page_from_iter); -- cgit v1.2.3