diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 162 |
1 files changed, 147 insertions, 15 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 2ebcf500871d..2723104cc06a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -42,6 +42,8 @@ #include <linux/ramfs.h> #include <linux/page_idle.h> #include <linux/migrate.h> +#include <linux/pipe_fs_i.h> +#include <linux/splice.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "internal.h" @@ -2436,21 +2438,19 @@ static int filemap_read_folio(struct file *file, filler_t filler, } static bool filemap_range_uptodate(struct address_space *mapping, - loff_t pos, struct iov_iter *iter, struct folio *folio) + loff_t pos, size_t count, struct folio *folio, + bool need_uptodate) { - int count; - if (folio_test_uptodate(folio)) return true; /* pipes can't handle partially uptodate pages */ - if (iov_iter_is_pipe(iter)) + if (need_uptodate) return false; if (!mapping->a_ops->is_partially_uptodate) return false; if (mapping->host->i_blkbits >= folio_shift(folio)) return false; - count = iter->count; if (folio_pos(folio) > pos) { count -= folio_pos(folio) - pos; pos = 0; @@ -2462,8 +2462,8 @@ static bool filemap_range_uptodate(struct address_space *mapping, } static int filemap_update_page(struct kiocb *iocb, - struct address_space *mapping, struct iov_iter *iter, - struct folio *folio) + struct address_space *mapping, size_t count, + struct folio *folio, bool need_uptodate) { int error; @@ -2497,7 +2497,8 @@ static int filemap_update_page(struct kiocb *iocb, goto unlock; error = 0; - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio)) + if (filemap_range_uptodate(mapping, iocb->ki_pos, count, folio, + need_uptodate)) goto unlock; error = -EAGAIN; @@ -2573,8 +2574,8 @@ static int filemap_readahead(struct kiocb *iocb, struct file *file, return 0; } -static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, - struct folio_batch *fbatch) +static int filemap_get_pages(struct kiocb *iocb, size_t count, + struct folio_batch *fbatch, bool need_uptodate) { struct file *filp = iocb->ki_filp; struct address_space *mapping = filp->f_mapping; @@ -2584,18 +2585,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio; int err = 0; - last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); + /* "last_index" is the index of the page beyond the end of the read */ + last_index = DIV_ROUND_UP(iocb->ki_pos + count, PAGE_SIZE); retry: if (fatal_signal_pending(current)) return -EINTR; - filemap_get_read_batch(mapping, index, last_index, fbatch); + filemap_get_read_batch(mapping, index, last_index - 1, fbatch); if (!folio_batch_count(fbatch)) { if (iocb->ki_flags & IOCB_NOIO) return -EAGAIN; page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); - filemap_get_read_batch(mapping, index, last_index, fbatch); + filemap_get_read_batch(mapping, index, last_index - 1, fbatch); } if (!folio_batch_count(fbatch)) { if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) @@ -2617,7 +2619,8 @@ retry: if ((iocb->ki_flags & IOCB_WAITQ) && folio_batch_count(fbatch) > 1) iocb->ki_flags |= IOCB_NOWAIT; - err = filemap_update_page(iocb, mapping, iter, folio); + err = filemap_update_page(iocb, mapping, count, folio, + need_uptodate); if (err) goto err; } @@ -2687,7 +2690,8 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(iocb->ki_pos >= i_size_read(inode))) break; - error = filemap_get_pages(iocb, iter, &fbatch); + error = filemap_get_pages(iocb, iter->count, &fbatch, + iov_iter_is_pipe(iter)); if (error < 0) break; @@ -2837,6 +2841,134 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) } EXPORT_SYMBOL(generic_file_read_iter); +/* + * Splice subpages from a folio into a pipe. + */ +size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, + struct folio *folio, loff_t fpos, size_t size) +{ + struct page *page; + size_t spliced = 0, offset = offset_in_folio(folio, fpos); + + page = folio_page(folio, offset / PAGE_SIZE); + size = min(size, folio_size(folio) - offset); + offset %= PAGE_SIZE; + + while (spliced < size && + !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + struct pipe_buffer *buf = pipe_head_buf(pipe); + size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced); + + *buf = (struct pipe_buffer) { + .ops = &page_cache_pipe_buf_ops, + .page = page, + .offset = offset, + .len = part, + }; + folio_get(folio); + pipe->head++; + page++; + spliced += part; + offset = 0; + } + + return spliced; +} + +/* + * Splice folios from the pagecache of a buffered (ie. non-O_DIRECT) file into + * a pipe. + */ +ssize_t filemap_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct folio_batch fbatch; + struct kiocb iocb; + size_t total_spliced = 0, used, npages; + loff_t isize, end_offset; + bool writably_mapped; + int i, error = 0; + + init_sync_kiocb(&iocb, in); + iocb.ki_pos = *ppos; + + /* Work out how much data we can actually add into the pipe */ + used = pipe_occupancy(pipe->head, pipe->tail); + npages = max_t(ssize_t, pipe->max_usage - used, 0); + len = min_t(size_t, len, npages * PAGE_SIZE); + + folio_batch_init(&fbatch); + + do { + cond_resched(); + + if (*ppos >= i_size_read(file_inode(in))) + break; + + iocb.ki_pos = *ppos; + error = filemap_get_pages(&iocb, len, &fbatch, true); + if (error < 0) + break; + + /* + * i_size must be checked after we know the pages are Uptodate. + * + * Checking i_size after the check allows us to calculate + * the correct value for "nr", which means the zero-filled + * part of the page is not copied back to userspace (unless + * another truncate extends the file - this is desired though). + */ + isize = i_size_read(file_inode(in)); + if (unlikely(*ppos >= isize)) + break; + end_offset = min_t(loff_t, isize, *ppos + len); + + /* + * Once we start copying data, we don't want to be touching any + * cachelines that might be contended: + */ + writably_mapped = mapping_writably_mapped(in->f_mapping); + + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + size_t n; + + if (folio_pos(folio) >= end_offset) + goto out; + folio_mark_accessed(folio); + + /* + * If users can be writing to this folio using arbitrary + * virtual addresses, take care of potential aliasing + * before reading the folio on the kernel side. + */ + if (writably_mapped) + flush_dcache_folio(folio); + + n = min_t(loff_t, len, isize - *ppos); + n = splice_folio_into_pipe(pipe, folio, *ppos, n); + if (!n) + goto out; + len -= n; + total_spliced += n; + *ppos += n; + in->f_ra.prev_pos = *ppos; + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + goto out; + } + + folio_batch_release(&fbatch); + } while (len); + +out: + folio_batch_release(&fbatch); + file_accessed(in); + + return total_spliced ? total_spliced : error; +} +EXPORT_SYMBOL(filemap_splice_read); + static inline loff_t folio_seek_hole_data(struct xa_state *xas, struct address_space *mapping, struct folio *folio, loff_t start, loff_t end, bool seek_data) |