summaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c609
1 files changed, 343 insertions, 266 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 0b2067b3c328..39bb88140680 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
if (PageSwapBacked(page)) {
__mod_lruvec_page_state(page, NR_SHMEM, -nr);
if (PageTransHuge(page))
- __dec_node_page_state(page, NR_SHMEM_THPS);
+ __dec_lruvec_page_state(page, NR_SHMEM_THPS);
} else if (PageTransHuge(page)) {
- __dec_node_page_state(page, NR_FILE_THPS);
+ __dec_lruvec_page_state(page, NR_FILE_THPS);
filemap_nr_thps_dec(mapping);
}
@@ -1583,19 +1583,20 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
else
wait_on_page_locked(page);
return 0;
- } else {
- if (flags & FAULT_FLAG_KILLABLE) {
- int ret;
+ }
+ if (flags & FAULT_FLAG_KILLABLE) {
+ int ret;
- ret = __lock_page_killable(page);
- if (ret) {
- mmap_read_unlock(mm);
- return 0;
- }
- } else
- __lock_page(page);
- return 1;
+ ret = __lock_page_killable(page);
+ if (ret) {
+ mmap_read_unlock(mm);
+ return 0;
+ }
+ } else {
+ __lock_page(page);
}
+ return 1;
+
}
/**
@@ -2166,6 +2167,259 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
ra->ra_pages /= 4;
}
+static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
+{
+ if (iocb->ki_flags & IOCB_WAITQ)
+ return lock_page_async(page, iocb->ki_waitq);
+ else if (iocb->ki_flags & IOCB_NOWAIT)
+ return trylock_page(page) ? 0 : -EAGAIN;
+ else
+ return lock_page_killable(page);
+}
+
+static struct page *
+generic_file_buffered_read_readpage(struct kiocb *iocb,
+ struct file *filp,
+ struct address_space *mapping,
+ struct page *page)
+{
+ struct file_ra_state *ra = &filp->f_ra;
+ int error;
+
+ if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ /*
+ * A previous I/O error may have been due to temporary
+ * failures, eg. multipath errors.
+ * PG_error will be set again if readpage fails.
+ */
+ ClearPageError(page);
+ /* Start the actual read. The read will unlock the page. */
+ error = mapping->a_ops->readpage(filp, page);
+
+ if (unlikely(error)) {
+ put_page(page);
+ return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
+ }
+
+ if (!PageUptodate(page)) {
+ error = lock_page_for_iocb(iocb, page);
+ if (unlikely(error)) {
+ put_page(page);
+ return ERR_PTR(error);
+ }
+ if (!PageUptodate(page)) {
+ if (page->mapping == NULL) {
+ /*
+ * invalidate_mapping_pages got it
+ */
+ unlock_page(page);
+ put_page(page);
+ return NULL;
+ }
+ unlock_page(page);
+ shrink_readahead_size_eio(ra);
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ unlock_page(page);
+ }
+
+ return page;
+}
+
+static struct page *
+generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
+ struct file *filp,
+ struct iov_iter *iter,
+ struct page *page,
+ loff_t pos, loff_t count)
+{
+ struct address_space *mapping = filp->f_mapping;
+ struct inode *inode = mapping->host;
+ int error;
+
+ /*
+ * See comment in do_read_cache_page on why
+ * wait_on_page_locked is used to avoid unnecessarily
+ * serialisations and why it's safe.
+ */
+ if (iocb->ki_flags & IOCB_WAITQ) {
+ error = wait_on_page_locked_async(page,
+ iocb->ki_waitq);
+ } else {
+ error = wait_on_page_locked_killable(page);
+ }
+ if (unlikely(error)) {
+ put_page(page);
+ return ERR_PTR(error);
+ }
+ if (PageUptodate(page))
+ return page;
+
+ if (inode->i_blkbits == PAGE_SHIFT ||
+ !mapping->a_ops->is_partially_uptodate)
+ goto page_not_up_to_date;
+ /* pipes can't handle partially uptodate pages */
+ if (unlikely(iov_iter_is_pipe(iter)))
+ goto page_not_up_to_date;
+ if (!trylock_page(page))
+ goto page_not_up_to_date;
+ /* Did it get truncated before we got the lock? */
+ if (!page->mapping)
+ goto page_not_up_to_date_locked;
+ if (!mapping->a_ops->is_partially_uptodate(page,
+ pos & ~PAGE_MASK, count))
+ goto page_not_up_to_date_locked;
+ unlock_page(page);
+ return page;
+
+page_not_up_to_date:
+ /* Get exclusive access to the page ... */
+ error = lock_page_for_iocb(iocb, page);
+ if (unlikely(error)) {
+ put_page(page);
+ return ERR_PTR(error);
+ }
+
+page_not_up_to_date_locked:
+ /* Did it get truncated before we got the lock? */
+ if (!page->mapping) {
+ unlock_page(page);
+ put_page(page);
+ return NULL;
+ }
+
+ /* Did somebody else fill it already? */
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ return page;
+ }
+
+ return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+}
+
+static struct page *
+generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ struct file *filp = iocb->ki_filp;
+ struct address_space *mapping = filp->f_mapping;
+ pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+ struct page *page;
+ int error;
+
+ if (iocb->ki_flags & IOCB_NOIO)
+ return ERR_PTR(-EAGAIN);
+
+ /*
+ * Ok, it wasn't cached, so we need to create a new
+ * page..
+ */
+ page = page_cache_alloc(mapping);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ error = add_to_page_cache_lru(page, mapping, index,
+ mapping_gfp_constraint(mapping, GFP_KERNEL));
+ if (error) {
+ put_page(page);
+ return error != -EEXIST ? ERR_PTR(error) : NULL;
+ }
+
+ return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+}
+
+static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
+ struct iov_iter *iter,
+ struct page **pages,
+ unsigned int nr)
+{
+ struct file *filp = iocb->ki_filp;
+ struct address_space *mapping = filp->f_mapping;
+ struct file_ra_state *ra = &filp->f_ra;
+ pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+ pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+ int i, j, nr_got, err = 0;
+
+ nr = min_t(unsigned long, last_index - index, nr);
+find_page:
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ nr_got = find_get_pages_contig(mapping, index, nr, pages);
+ if (nr_got)
+ goto got_pages;
+
+ if (iocb->ki_flags & IOCB_NOIO)
+ return -EAGAIN;
+
+ page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
+
+ nr_got = find_get_pages_contig(mapping, index, nr, pages);
+ if (nr_got)
+ goto got_pages;
+
+ pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
+ err = PTR_ERR_OR_ZERO(pages[0]);
+ if (!IS_ERR_OR_NULL(pages[0]))
+ nr_got = 1;
+got_pages:
+ for (i = 0; i < nr_got; i++) {
+ struct page *page = pages[i];
+ pgoff_t pg_index = index + i;
+ loff_t pg_pos = max(iocb->ki_pos,
+ (loff_t) pg_index << PAGE_SHIFT);
+ loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
+
+ if (PageReadahead(page)) {
+ if (iocb->ki_flags & IOCB_NOIO) {
+ for (j = i; j < nr_got; j++)
+ put_page(pages[j]);
+ nr_got = i;
+ err = -EAGAIN;
+ break;
+ }
+ page_cache_async_readahead(mapping, ra, filp, page,
+ pg_index, last_index - pg_index);
+ }
+
+ if (!PageUptodate(page)) {
+ if ((iocb->ki_flags & IOCB_NOWAIT) ||
+ ((iocb->ki_flags & IOCB_WAITQ) && i)) {
+ for (j = i; j < nr_got; j++)
+ put_page(pages[j]);
+ nr_got = i;
+ err = -EAGAIN;
+ break;
+ }
+
+ page = generic_file_buffered_read_pagenotuptodate(iocb,
+ filp, iter, page, pg_pos, pg_count);
+ if (IS_ERR_OR_NULL(page)) {
+ for (j = i + 1; j < nr_got; j++)
+ put_page(pages[j]);
+ nr_got = i;
+ err = PTR_ERR_OR_ZERO(page);
+ break;
+ }
+ }
+ }
+
+ if (likely(nr_got))
+ return nr_got;
+ if (err)
+ return err;
+ /*
+ * No pages and no error means we raced and should retry:
+ */
+ goto find_page;
+}
+
/**
* generic_file_buffered_read - generic file read routine
* @iocb: the iocb to read
@@ -2186,294 +2440,117 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
struct iov_iter *iter, ssize_t written)
{
struct file *filp = iocb->ki_filp;
+ struct file_ra_state *ra = &filp->f_ra;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- struct file_ra_state *ra = &filp->f_ra;
- loff_t *ppos = &iocb->ki_pos;
- pgoff_t index;
- pgoff_t last_index;
- pgoff_t prev_index;
- unsigned long offset; /* offset into pagecache page */
- unsigned int prev_offset;
- int error = 0;
-
- if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
+ struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
+ unsigned int nr_pages = min_t(unsigned int, 512,
+ ((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (iocb->ki_pos >> PAGE_SHIFT));
+ int i, pg_nr, error = 0;
+ bool writably_mapped;
+ loff_t isize, end_offset;
+
+ if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- index = *ppos >> PAGE_SHIFT;
- prev_index = ra->prev_pos >> PAGE_SHIFT;
- prev_offset = ra->prev_pos & (PAGE_SIZE-1);
- last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
- offset = *ppos & ~PAGE_MASK;
-
- /*
- * If we've already successfully copied some data, then we
- * can no longer safely return -EIOCBQUEUED. Hence mark
- * an async read NOWAIT at that point.
- */
- if (written && (iocb->ki_flags & IOCB_WAITQ))
- iocb->ki_flags |= IOCB_NOWAIT;
+ if (nr_pages > ARRAY_SIZE(pages_onstack))
+ pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
- for (;;) {
- struct page *page;
- pgoff_t end_index;
- loff_t isize;
- unsigned long nr, ret;
+ if (!pages) {
+ pages = pages_onstack;
+ nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
+ }
+ do {
cond_resched();
-find_page:
- if (fatal_signal_pending(current)) {
- error = -EINTR;
- goto out;
- }
- page = find_get_page(mapping, index);
- if (!page) {
- if (iocb->ki_flags & IOCB_NOIO)
- goto would_block;
- page_cache_sync_readahead(mapping,
- ra, filp,
- index, last_index - index);
- page = find_get_page(mapping, index);
- if (unlikely(page == NULL))
- goto no_cached_page;
- }
- if (PageReadahead(page)) {
- if (iocb->ki_flags & IOCB_NOIO) {
- put_page(page);
- goto out;
- }
- page_cache_async_readahead(mapping,
- ra, filp, page,
- index, last_index - index);
- }
- if (!PageUptodate(page)) {
- /*
- * See comment in do_read_cache_page on why
- * wait_on_page_locked is used to avoid unnecessarily
- * serialisations and why it's safe.
- */
- if (iocb->ki_flags & IOCB_WAITQ) {
- if (written) {
- put_page(page);
- goto out;
- }
- error = wait_on_page_locked_async(page,
- iocb->ki_waitq);
- } else {
- if (iocb->ki_flags & IOCB_NOWAIT) {
- put_page(page);
- goto would_block;
- }
- error = wait_on_page_locked_killable(page);
- }
- if (unlikely(error))
- goto readpage_error;
- if (PageUptodate(page))
- goto page_ok;
-
- if (inode->i_blkbits == PAGE_SHIFT ||
- !mapping->a_ops->is_partially_uptodate)
- goto page_not_up_to_date;
- /* pipes can't handle partially uptodate pages */
- if (unlikely(iov_iter_is_pipe(iter)))
- goto page_not_up_to_date;
- if (!trylock_page(page))
- goto page_not_up_to_date;
- /* Did it get truncated before we got the lock? */
- if (!page->mapping)
- goto page_not_up_to_date_locked;
- if (!mapping->a_ops->is_partially_uptodate(page,
- offset, iter->count))
- goto page_not_up_to_date_locked;
- unlock_page(page);
+ /*
+ * If we've already successfully copied some data, then we
+ * can no longer safely return -EIOCBQUEUED. Hence mark
+ * an async read NOWAIT at that point.
+ */
+ if ((iocb->ki_flags & IOCB_WAITQ) && written)
+ iocb->ki_flags |= IOCB_NOWAIT;
+
+ i = 0;
+ pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
+ pages, nr_pages);
+ if (pg_nr < 0) {
+ error = pg_nr;
+ break;
}
-page_ok:
+
/*
- * i_size must be checked after we know the page is Uptodate.
+ * i_size must be checked after we know the pages are Uptodate.
*
* Checking i_size after the check allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
-
isize = i_size_read(inode);
- end_index = (isize - 1) >> PAGE_SHIFT;
- if (unlikely(!isize || index > end_index)) {
- put_page(page);
- goto out;
- }
+ if (unlikely(iocb->ki_pos >= isize))
+ goto put_pages;
- /* nr is the maximum number of bytes to copy from this page */
- nr = PAGE_SIZE;
- if (index == end_index) {
- nr = ((isize - 1) & ~PAGE_MASK) + 1;
- if (nr <= offset) {
- put_page(page);
- goto out;
- }
- }
- nr = nr - offset;
+ end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
- /* If users can be writing to this page using arbitrary
- * virtual addresses, take care about potential aliasing
- * before reading the page on the kernel side.
- */
- if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
+ (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
+ put_page(pages[--pg_nr]);
/*
- * When a sequential read accesses a page several times,
- * only mark it as accessed the first time.
+ * Once we start copying data, we don't want to be touching any
+ * cachelines that might be contended:
*/
- if (prev_index != index || offset != prev_offset)
- mark_page_accessed(page);
- prev_index = index;
+ writably_mapped = mapping_writably_mapped(mapping);
/*
- * Ok, we have the page, and it's up-to-date, so
- * now we can copy it to user space...
+ * When a sequential read accesses a page several times, only
+ * mark it as accessed the first time.
*/
+ if (iocb->ki_pos >> PAGE_SHIFT !=
+ ra->prev_pos >> PAGE_SHIFT)
+ mark_page_accessed(pages[0]);
+ for (i = 1; i < pg_nr; i++)
+ mark_page_accessed(pages[i]);
+
+ for (i = 0; i < pg_nr; i++) {
+ unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
+ unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+ PAGE_SIZE - offset);
+ unsigned int copied;
- ret = copy_page_to_iter(page, offset, nr, iter);
- offset += ret;
- index += offset >> PAGE_SHIFT;
- offset &= ~PAGE_MASK;
- prev_offset = offset;
-
- put_page(page);
- written += ret;
- if (!iov_iter_count(iter))
- goto out;
- if (ret < nr) {
- error = -EFAULT;
- goto out;
- }
- continue;
-
-page_not_up_to_date:
- /* Get exclusive access to the page ... */
- if (iocb->ki_flags & IOCB_WAITQ) {
- if (written) {
- put_page(page);
- goto out;
- }
- error = lock_page_async(page, iocb->ki_waitq);
- } else {
- error = lock_page_killable(page);
- }
- if (unlikely(error))
- goto readpage_error;
-
-page_not_up_to_date_locked:
- /* Did it get truncated before we got the lock? */
- if (!page->mapping) {
- unlock_page(page);
- put_page(page);
- continue;
- }
-
- /* Did somebody else fill it already? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto page_ok;
- }
-
-readpage:
- if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
- unlock_page(page);
- put_page(page);
- goto would_block;
- }
- /*
- * A previous I/O error may have been due to temporary
- * failures, eg. multipath errors.
- * PG_error will be set again if readpage fails.
- */
- ClearPageError(page);
- /* Start the actual read. The read will unlock the page. */
- error = mapping->a_ops->readpage(filp, page);
+ /*
+ * If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+ * before reading the page on the kernel side.
+ */
+ if (writably_mapped)
+ flush_dcache_page(pages[i]);
- if (unlikely(error)) {
- if (error == AOP_TRUNCATED_PAGE) {
- put_page(page);
- error = 0;
- goto find_page;
- }
- goto readpage_error;
- }
+ copied = copy_page_to_iter(pages[i], offset, bytes, iter);
- if (!PageUptodate(page)) {
- if (iocb->ki_flags & IOCB_WAITQ) {
- if (written) {
- put_page(page);
- goto out;
- }
- error = lock_page_async(page, iocb->ki_waitq);
- } else {
- error = lock_page_killable(page);
- }
+ written += copied;
+ iocb->ki_pos += copied;
+ ra->prev_pos = iocb->ki_pos;
- if (unlikely(error))
- goto readpage_error;
- if (!PageUptodate(page)) {
- if (page->mapping == NULL) {
- /*
- * invalidate_mapping_pages got it
- */
- unlock_page(page);
- put_page(page);
- goto find_page;
- }
- unlock_page(page);
- shrink_readahead_size_eio(ra);
- error = -EIO;
- goto readpage_error;
+ if (copied < bytes) {
+ error = -EFAULT;
+ break;
}
- unlock_page(page);
}
+put_pages:
+ for (i = 0; i < pg_nr; i++)
+ put_page(pages[i]);
+ } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
- goto page_ok;
-
-readpage_error:
- /* UHHUH! A synchronous read error occurred. Report it */
- put_page(page);
- goto out;
-
-no_cached_page:
- /*
- * Ok, it wasn't cached, so we need to create a new
- * page..
- */
- page = page_cache_alloc(mapping);
- if (!page) {
- error = -ENOMEM;
- goto out;
- }
- error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_constraint(mapping, GFP_KERNEL));
- if (error) {
- put_page(page);
- if (error == -EEXIST) {
- error = 0;
- goto find_page;
- }
- goto out;
- }
- goto readpage;
- }
+ file_accessed(filp);
-would_block:
- error = -EAGAIN;
-out:
- ra->prev_pos = prev_index;
- ra->prev_pos <<= PAGE_SHIFT;
- ra->prev_pos |= prev_offset;
+ if (pages != pages_onstack)
+ kfree(pages);
- *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
- file_accessed(filp);
return written ? written : error;
}
EXPORT_SYMBOL_GPL(generic_file_buffered_read);