summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c116
-rw-r--r--fs/aio.c2
-rw-r--r--fs/btrfs/extent_io.c57
-rw-r--r--fs/buffer.c54
-rw-r--r--fs/ceph/addr.c58
-rw-r--r--fs/cifs/file.c118
-rw-r--r--fs/coredump.c8
-rw-r--r--fs/cramfs/inode.c4
-rw-r--r--fs/erofs/data.c2
-rw-r--r--fs/exec.c20
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/inode.c69
-rw-r--r--fs/ext4/move_extent.c46
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/f2fs/checkpoint.c49
-rw-r--r--fs/f2fs/data.c84
-rw-r--r--fs/f2fs/node.c72
-rw-r--r--fs/fs-writeback.c10
-rw-r--r--fs/fuse/dax.c2
-rw-r--r--fs/fuse/file.c18
-rw-r--r--fs/gfs2/aops.c64
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/log.c6
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/hugetlbfs/inode.c72
-rw-r--r--fs/iomap/buffered-io.c5
-rw-r--r--fs/jbd2/commit.c33
-rw-r--r--fs/jbd2/journal.c3
-rw-r--r--fs/mpage.c73
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/btree.c16
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/page.c59
-rw-r--r--fs/nilfs2/segment.c46
-rw-r--r--fs/ntfs3/inode.c33
-rw-r--r--fs/ocfs2/journal.c16
-rw-r--r--fs/orangefs/file.c3
-rw-r--r--fs/orangefs/inode.c23
-rw-r--r--fs/proc/task_mmu.c31
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/proc/vmcore.c3
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/tail_conversion.c2
-rw-r--r--fs/romfs/mmap-nommu.c2
-rw-r--r--fs/udf/inode.c3
-rw-r--r--fs/userfaultfd.c115
-rw-r--r--fs/xfs/xfs_file.c2
50 files changed, 726 insertions, 706 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 2d17891b618e..571f3b9a417e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -704,85 +704,87 @@ static int afs_writepages_region(struct address_space *mapping,
bool max_one_loop)
{
struct folio *folio;
- struct page *head_page;
+ struct folio_batch fbatch;
ssize_t ret;
+ unsigned int i;
int n, skips = 0;
_enter("%llx,%llx,", start, end);
+ folio_batch_init(&fbatch);
do {
pgoff_t index = start / PAGE_SIZE;
- n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
- PAGECACHE_TAG_DIRTY, 1, &head_page);
+ n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
+ PAGECACHE_TAG_DIRTY, &fbatch);
+
if (!n)
break;
+ for (i = 0; i < n; i++) {
+ folio = fbatch.folios[i];
+ start = folio_pos(folio); /* May regress with THPs */
- folio = page_folio(head_page);
- start = folio_pos(folio); /* May regress with THPs */
-
- _debug("wback %lx", folio_index(folio));
+ _debug("wback %lx", folio_index(folio));
- /* At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
- */
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0) {
- folio_put(folio);
- return ret;
- }
- } else {
- if (!folio_trylock(folio)) {
- folio_put(folio);
- return 0;
+ /* At this point we hold neither the i_pages lock nor the
+ * page lock: the page may be truncated or invalidated
+ * (changing page->mapping to NULL), or even swizzled
+ * back from swapper_space to tmpfs file mapping
+ */
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ ret = folio_lock_killable(folio);
+ if (ret < 0) {
+ folio_batch_release(&fbatch);
+ return ret;
+ }
+ } else {
+ if (!folio_trylock(folio))
+ continue;
}
- }
- if (folio_mapping(folio) != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- folio_put(folio);
- continue;
- }
+ if (folio->mapping != mapping ||
+ !folio_test_dirty(folio)) {
+ start += folio_size(folio);
+ folio_unlock(folio);
+ continue;
+ }
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode != WB_SYNC_NONE) {
- folio_wait_writeback(folio);
+ if (folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ folio_wait_writeback(folio);
#ifdef CONFIG_AFS_FSCACHE
- folio_wait_fscache(folio);
+ folio_wait_fscache(folio);
#endif
- } else {
- start += folio_size(folio);
+ } else {
+ start += folio_size(folio);
+ }
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ if (skips >= 5 || need_resched()) {
+ *_next = start;
+ _leave(" = 0 [%llx]", *_next);
+ return 0;
+ }
+ skips++;
+ }
+ continue;
}
- folio_put(folio);
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (skips >= 5 || need_resched())
- break;
- skips++;
+
+ if (!folio_clear_dirty_for_io(folio))
+ BUG();
+ ret = afs_write_back_from_locked_folio(mapping, wbc,
+ folio, start, end);
+ if (ret < 0) {
+ _leave(" = %zd", ret);
+ folio_batch_release(&fbatch);
+ return ret;
}
- continue;
- }
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
- folio_put(folio);
- if (ret < 0) {
- _leave(" = %zd", ret);
- return ret;
+ start += ret;
}
- start += ret;
-
- if (max_one_loop)
- break;
-
+ folio_batch_release(&fbatch);
cond_resched();
} while (wbc->nr_to_write > 0);
diff --git a/fs/aio.c b/fs/aio.c
index e85ba0b77f59..b0b17bd098bb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -394,7 +394,7 @@ static const struct vm_operations_struct aio_ring_vm_ops = {
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
{
- vma->vm_flags |= VM_DONTEXPAND;
+ vm_flags_set(vma, VM_DONTEXPAND);
vma->vm_ops = &aio_ring_vm_ops;
return 0;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c25fa74d7615..40300e8e5f99 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2408,14 +2408,14 @@ int btree_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
- struct pagevec pvec;
- int nr_pages;
+ struct folio_batch fbatch;
+ unsigned int nr_folios;
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
xa_mark_t tag;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
@@ -2438,14 +2438,15 @@ retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
- tag))) {
+ (nr_folios = filemap_get_folios_tag(mapping, &index, end,
+ tag, &fbatch))) {
unsigned i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch.folios[i];
- ret = submit_eb_page(page, wbc, &bio_ctrl, &eb_context);
+ ret = submit_eb_page(&folio->page, wbc, &bio_ctrl,
+ &eb_context);
if (ret == 0)
continue;
if (ret < 0) {
@@ -2460,7 +2461,7 @@ retry:
*/
nr_to_write_done = wbc->nr_to_write <= 0;
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
if (!scanned && !done) {
@@ -2535,8 +2536,8 @@ static int extent_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
- struct pagevec pvec;
- int nr_pages;
+ struct folio_batch fbatch;
+ unsigned int nr_folios;
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
@@ -2556,7 +2557,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
if (!igrab(inode))
return 0;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
@@ -2594,14 +2595,14 @@ retry:
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
- &index, end, tag))) {
+ (nr_folios = filemap_get_folios_tag(mapping, &index,
+ end, tag, &fbatch))) {
unsigned i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch.folios[i];
- done_index = page->index + 1;
+ done_index = folio->index + folio_nr_pages(folio);
/*
* At this point we hold neither the i_pages lock nor
* the page lock: the page may be truncated or
@@ -2609,29 +2610,29 @@ retry:
* or even swizzled back from swapper_space to
* tmpfs file mapping
*/
- if (!trylock_page(page)) {
+ if (!folio_trylock(folio)) {
submit_write_bio(bio_ctrl, 0);
- lock_page(page);
+ folio_lock(folio);
}
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
+ if (unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
continue;
}
if (wbc->sync_mode != WB_SYNC_NONE) {
- if (PageWriteback(page))
+ if (folio_test_writeback(folio))
submit_write_bio(bio_ctrl, 0);
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
}
- if (PageWriteback(page) ||
- !clear_page_dirty_for_io(page)) {
- unlock_page(page);
+ if (folio_test_writeback(folio) ||
+ !folio_clear_dirty_for_io(folio)) {
+ folio_unlock(folio);
continue;
}
- ret = __extent_writepage(page, wbc, bio_ctrl);
+ ret = __extent_writepage(&folio->page, wbc, bio_ctrl);
if (ret < 0) {
done = 1;
break;
@@ -2644,7 +2645,7 @@ retry:
*/
nr_to_write_done = wbc->nr_to_write <= 0;
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
if (!scanned && !done) {
diff --git a/fs/buffer.c b/fs/buffer.c
index 623e77d6ef77..9e1e2add541e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -61,7 +61,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
inline void touch_buffer(struct buffer_head *bh)
{
trace_block_touch_buffer(bh);
- mark_page_accessed(bh->b_page);
+ folio_mark_accessed(bh->b_folio);
}
EXPORT_SYMBOL(touch_buffer);
@@ -247,18 +247,18 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
- struct page *page;
- int page_uptodate = 1;
+ struct folio *folio;
+ int folio_uptodate = 1;
BUG_ON(!buffer_async_read(bh));
- page = bh->b_page;
+ folio = bh->b_folio;
if (uptodate) {
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
buffer_io_error(bh, ", async page read");
- SetPageError(page);
+ folio_set_error(folio);
}
/*
@@ -266,14 +266,14 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
* two buffer heads end IO at almost the same time and both
* decide that the page is now completely done.
*/
- first = page_buffers(page);
+ first = folio_buffers(folio);
spin_lock_irqsave(&first->b_uptodate_lock, flags);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
do {
if (!buffer_uptodate(tmp))
- page_uptodate = 0;
+ folio_uptodate = 0;
if (buffer_async_read(tmp)) {
BUG_ON(!buffer_locked(tmp));
goto still_busy;
@@ -286,9 +286,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
* If all of the buffers are uptodate then we can set the page
* uptodate.
*/
- if (page_uptodate)
- SetPageUptodate(page);
- unlock_page(page);
+ if (folio_uptodate)
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
return;
still_busy:
@@ -353,7 +353,7 @@ static void decrypt_bh(struct work_struct *work)
*/
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
{
- struct inode *inode = bh->b_page->mapping->host;
+ struct inode *inode = bh->b_folio->mapping->host;
bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode);
bool verify = need_fsverity(bh);
@@ -387,21 +387,21 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
- struct page *page;
+ struct folio *folio;
BUG_ON(!buffer_async_write(bh));
- page = bh->b_page;
+ folio = bh->b_folio;
if (uptodate) {
set_buffer_uptodate(bh);
} else {
buffer_io_error(bh, ", lost async page write");
mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
- SetPageError(page);
+ folio_set_error(folio);
}
- first = page_buffers(page);
+ first = folio_buffers(folio);
spin_lock_irqsave(&first->b_uptodate_lock, flags);
clear_buffer_async_write(bh);
@@ -415,7 +415,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
tmp = tmp->b_this_page;
}
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- end_page_writeback(page);
+ folio_end_writeback(folio);
return;
still_busy:
@@ -613,7 +613,7 @@ void write_boundary_block(struct block_device *bdev,
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
{
struct address_space *mapping = inode->i_mapping;
- struct address_space *buffer_mapping = bh->b_page->mapping;
+ struct address_space *buffer_mapping = bh->b_folio->mapping;
mark_buffer_dirty(bh);
if (!mapping->private_data) {
@@ -1116,7 +1116,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
* and then attach the address_space's inode to its superblock's dirty
* inode list.
*
- * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
+ * mark_buffer_dirty() is atomic. It takes bh->b_folio->mapping->private_lock,
* i_pages lock and mapping->host->i_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
@@ -1138,16 +1138,16 @@ void mark_buffer_dirty(struct buffer_head *bh)
}
if (!test_set_buffer_dirty(bh)) {
- struct page *page = bh->b_page;
+ struct folio *folio = bh->b_folio;
struct address_space *mapping = NULL;
- lock_page_memcg(page);
- if (!TestSetPageDirty(page)) {
- mapping = page_mapping(page);
+ folio_memcg_lock(folio);
+ if (!folio_test_set_dirty(folio)) {
+ mapping = folio->mapping;
if (mapping)
- __set_page_dirty(page, mapping, 0);
+ __folio_mark_dirty(folio, mapping, 0);
}
- unlock_page_memcg(page);
+ folio_memcg_unlock(folio);
if (mapping)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
@@ -1160,8 +1160,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
set_buffer_write_io_error(bh);
/* FIXME: do we need to set this in both places? */
- if (bh->b_page && bh->b_page->mapping)
- mapping_set_error(bh->b_page->mapping, -EIO);
+ if (bh->b_folio && bh->b_folio->mapping)
+ mapping_set_error(bh->b_folio->mapping, -EIO);
if (bh->b_assoc_map)
mapping_set_error(bh->b_assoc_map, -EIO);
rcu_read_lock();
@@ -1197,7 +1197,7 @@ void __bforget(struct buffer_head *bh)
{
clear_buffer_dirty(bh);
if (bh->b_assoc_map) {
- struct address_space *buffer_mapping = bh->b_page->mapping;
+ struct address_space *buffer_mapping = bh->b_folio->mapping;
spin_lock(&buffer_mapping->private_lock);
list_del_init(&bh->b_assoc_buffers);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index cac4083e387a..d5335f445233 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -800,7 +800,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct ceph_vino vino = ceph_vino(inode);
pgoff_t index, start_index, end = -1;
struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
- struct pagevec pvec;
+ struct folio_batch fbatch;
int rc = 0;
unsigned int wsize = i_blocksize(inode);
struct ceph_osd_request *req = NULL;
@@ -829,7 +829,7 @@ static int ceph_writepages_start(struct address_space *mapping,
if (fsc->mount_options->wsize < wsize)
wsize = fsc->mount_options->wsize;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
index = start_index;
@@ -877,7 +877,7 @@ retry:
while (!done && index <= end) {
int num_ops = 0, op_idx;
- unsigned i, pvec_pages, max_pages, locked_pages = 0;
+ unsigned i, nr_folios, max_pages, locked_pages = 0;
struct page **pages = NULL, **data_pages;
struct page *page;
pgoff_t strip_unit_end = 0;
@@ -887,13 +887,13 @@ retry:
max_pages = wsize >> PAGE_SHIFT;
get_more_pages:
- pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
- end, PAGECACHE_TAG_DIRTY);
- dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
- if (!pvec_pages && !locked_pages)
+ nr_folios = filemap_get_folios_tag(mapping, &index,
+ end, PAGECACHE_TAG_DIRTY, &fbatch);
+ dout("pagevec_lookup_range_tag got %d\n", nr_folios);
+ if (!nr_folios && !locked_pages)
break;
- for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
- page = pvec.pages[i];
+ for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
+ page = &fbatch.folios[i]->page;
dout("? %p idx %lu\n", page, page->index);
if (locked_pages == 0)
lock_page(page); /* first page */
@@ -1003,7 +1003,7 @@ get_more_pages:
len = 0;
}
- /* note position of first page in pvec */
+ /* note position of first page in fbatch */
dout("%p will write page %p idx %lu\n",
inode, page, page->index);
@@ -1013,30 +1013,30 @@ get_more_pages:
fsc->write_congested = true;
pages[locked_pages++] = page;
- pvec.pages[i] = NULL;
+ fbatch.folios[i] = NULL;
len += thp_size(page);
}
/* did we get anything? */
if (!locked_pages)
- goto release_pvec_pages;
+ goto release_folios;
if (i) {
unsigned j, n = 0;
- /* shift unused page to beginning of pvec */
- for (j = 0; j < pvec_pages; j++) {
- if (!pvec.pages[j])
+ /* shift unused page to beginning of fbatch */
+ for (j = 0; j < nr_folios; j++) {
+ if (!fbatch.folios[j])
continue;
if (n < j)
- pvec.pages[n] = pvec.pages[j];
+ fbatch.folios[n] = fbatch.folios[j];
n++;
}
- pvec.nr = n;
+ fbatch.nr = n;
- if (pvec_pages && i == pvec_pages &&
+ if (nr_folios && i == nr_folios &&
locked_pages < max_pages) {
- dout("reached end pvec, trying for more\n");
- pagevec_release(&pvec);
+ dout("reached end fbatch, trying for more\n");
+ folio_batch_release(&fbatch);
goto get_more_pages;
}
}
@@ -1172,10 +1172,10 @@ new_request:
if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
done = true;
-release_pvec_pages:
- dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
- pvec.nr ? pvec.pages[0] : NULL);
- pagevec_release(&pvec);
+release_folios:
+ dout("folio_batch release on %d folios (%p)\n", (int)fbatch.nr,
+ fbatch.nr ? fbatch.folios[0] : NULL);
+ folio_batch_release(&fbatch);
}
if (should_loop && !done) {
@@ -1192,15 +1192,17 @@ release_pvec_pages:
unsigned i, nr;
index = 0;
while ((index <= end) &&
- (nr = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_WRITEBACK))) {
+ (nr = filemap_get_folios_tag(mapping, &index,
+ (pgoff_t)-1,
+ PAGECACHE_TAG_WRITEBACK,
+ &fbatch))) {
for (i = 0; i < nr; i++) {
- page = pvec.pages[i];
+ page = &fbatch.folios[i]->page;
if (page_snap_context(page) != snapc)
continue;
wait_on_page_writeback(page);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0e602173ac76..5365a3299088 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2857,78 +2857,92 @@ static int cifs_writepages_region(struct address_space *mapping,
struct writeback_control *wbc,
loff_t start, loff_t end, loff_t *_next)
{
- struct folio *folio;
- struct page *head_page;
- ssize_t ret;
- int n, skips = 0;
+ struct folio_batch fbatch;
+ int skips = 0;
+ folio_batch_init(&fbatch);
do {
+ int nr;
pgoff_t index = start / PAGE_SIZE;
- n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
- PAGECACHE_TAG_DIRTY, 1, &head_page);
- if (!n)
+ nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
+ PAGECACHE_TAG_DIRTY, &fbatch);
+ if (!nr)
break;
- folio = page_folio(head_page);
- start = folio_pos(folio); /* May regress with THPs */
+ for (int i = 0; i < nr; i++) {
+ ssize_t ret;
+ struct folio *folio = fbatch.folios[i];
- /* At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
- */
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0) {
- folio_put(folio);
- return ret;
+redo_folio:
+ start = folio_pos(folio); /* May regress with THPs */
+
+ /* At this point we hold neither the i_pages lock nor the
+ * page lock: the page may be truncated or invalidated
+ * (changing page->mapping to NULL), or even swizzled
+ * back from swapper_space to tmpfs file mapping
+ */
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ ret = folio_lock_killable(folio);
+ if (ret < 0)
+ goto write_error;
+ } else {
+ if (!folio_trylock(folio))
+ goto skip_write;
}
- } else {
- if (!folio_trylock(folio)) {
- folio_put(folio);
- return 0;
+
+ if (folio_mapping(folio) != mapping ||
+ !folio_test_dirty(folio)) {
+ folio_unlock(folio);
+ goto skip_write;
}
- }
- if (folio_mapping(folio) != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- folio_put(folio);
- continue;
- }
+ if (folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ if (wbc->sync_mode == WB_SYNC_NONE)
+ goto skip_write;
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode != WB_SYNC_NONE) {
folio_wait_writeback(folio);
#ifdef CONFIG_CIFS_FSCACHE
folio_wait_fscache(folio);
#endif
- } else {
- start += folio_size(folio);
+ goto redo_folio;
}
- folio_put(folio);
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (skips >= 5 || need_resched())
- break;
- skips++;
- }
- continue;
- }
- if (!folio_clear_dirty_for_io(folio))
- /* We hold the page lock - it should've been dirty. */
- WARN_ON(1);
+ if (!folio_clear_dirty_for_io(folio))
+ /* We hold the page lock - it should've been dirty. */
+ WARN_ON(1);
- ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
- folio_put(folio);
- if (ret < 0)
+ ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+ if (ret < 0)
+ goto write_error;
+
+ start += ret;
+ continue;
+
+write_error:
+ folio_batch_release(&fbatch);
+ *_next = start;
return ret;
- start += ret;
+skip_write:
+ /*
+ * Too many skipped writes, or need to reschedule?
+ * Treat it as a write error without an error code.
+ */
+ if (skips >= 5 || need_resched()) {
+ ret = 0;
+ goto write_error;
+ }
+
+ /* Otherwise, just skip that folio and go on to the next */
+ skips++;
+ start += folio_size(folio);
+ continue;
+ }
+
+ folio_batch_release(&fbatch);
cond_resched();
} while (wbc->nr_to_write > 0);
diff --git a/fs/coredump.c b/fs/coredump.c
index a141dca68165..5df1e6e1eb2b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1108,14 +1108,14 @@ whole:
* Helper function for iterating across a vma list. It ensures that the caller
* will visit `gate_vma' prior to terminating the search.
*/
-static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
+static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi,
struct vm_area_struct *vma,
struct vm_area_struct *gate_vma)
{
if (gate_vma && (vma == gate_vma))
return NULL;
- vma = mas_next(mas, ULONG_MAX);
+ vma = vma_next(vmi);
if (vma)
return vma;
return gate_vma;
@@ -1143,7 +1143,7 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
{
struct vm_area_struct *gate_vma, *vma = NULL;
struct mm_struct *mm = current->mm;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
int i = 0;
/*
@@ -1164,7 +1164,7 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
return false;
}
- while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
+ while ((vma = coredump_next_vma(&vmi, vma, gate_vma)) != NULL) {
struct core_vma_metadata *m = cprm->vma_meta + i;
m->start = vma->vm_start;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 61ccf7722fc3..45a65c400bd0 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -408,7 +408,7 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
* unpopulated ptes via cramfs_read_folio().
*/
int i;
- vma->vm_flags |= VM_MIXEDMAP;
+ vm_flags_set(vma, VM_MIXEDMAP);
for (i = 0; i < pages && !ret; i++) {
vm_fault_t vmf;
unsigned long off = i * PAGE_SIZE;
@@ -437,7 +437,7 @@ bailout:
static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
+ return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
}
static unsigned long cramfs_physmem_get_unmapped_area(struct file *file,
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 032e12dccb84..e16545849ea7 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -424,7 +424,7 @@ static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
return -EINVAL;
vma->vm_ops = &erofs_dax_vm_ops;
- vma->vm_flags |= VM_HUGEPAGE;
+ vm_flags_set(vma, VM_HUGEPAGE);
return 0;
}
#else
diff --git a/fs/exec.c b/fs/exec.c
index 5c00670d25f3..7c44d0c65b1b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -270,7 +270,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
vma->vm_end = STACK_TOP_MAX;
vma->vm_start = vma->vm_end - PAGE_SIZE;
- vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
+ vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
err = insert_vm_struct(mm, vma);
@@ -699,7 +699,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
/*
* cover the whole range: [new_start, old_end)
*/
- if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
+ if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
return -ENOMEM;
/*
@@ -731,12 +731,9 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
}
tlb_finish_mmu(&tlb);
- /*
- * Shrink the vma to just the new range. Always succeeds.
- */
- vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
-
- return 0;
+ vma_prev(&vmi);
+ /* Shrink the vma to just the new range */
+ return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
}
/*
@@ -758,6 +755,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_expand;
unsigned long rlim_stack;
struct mmu_gather tlb;
+ struct vma_iterator vmi;
#ifdef CONFIG_STACK_GROWSUP
/* Limit stack size */
@@ -812,8 +810,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
vm_flags |= mm->def_flags;
vm_flags |= VM_STACK_INCOMPLETE_SETUP;
+ vma_iter_init(&vmi, mm, vma->vm_start);
+
tlb_gather_mmu(&tlb, mm);
- ret = mprotect_fixup(&tlb, vma, &prev, vma->vm_start, vma->vm_end,
+ ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end,
vm_flags);
tlb_finish_mmu(&tlb);
@@ -834,7 +834,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
}
/* mprotect_fixup is overkill to remove the temporary stack flags */
- vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
+ vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP);
stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
stack_size = vma->vm_end - vma->vm_start;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 7ac0a81bd371..6bdf61a62c79 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -801,7 +801,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
- vma->vm_flags |= VM_HUGEPAGE;
+ vm_flags_set(vma, VM_HUGEPAGE);
} else {
vma->vm_ops = &ext4_file_vm_ops;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b936ee3af51e..f67d0e1bf4e0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2596,8 +2596,8 @@ static bool ext4_page_nomap_can_writeout(struct page *page)
static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
{
struct address_space *mapping = mpd->inode->i_mapping;
- struct pagevec pvec;
- unsigned int nr_pages;
+ struct folio_batch fbatch;
+ unsigned int nr_folios;
long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
@@ -2611,18 +2611,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
-
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
mpd->map.m_len = 0;
mpd->next_page = index;
while (index <= end) {
- nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
- tag);
- if (nr_pages == 0)
+ nr_folios = filemap_get_folios_tag(mapping, &index, end,
+ tag, &fbatch);
+ if (nr_folios == 0)
break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch.folios[i];
/*
* Accumulated enough dirty pages? This doesn't apply
@@ -2636,10 +2635,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
goto out;
/* If we can't merge this page, we are done. */
- if (mpd->map.m_len > 0 && mpd->next_page != page->index)
+ if (mpd->map.m_len > 0 && mpd->next_page != folio->index)
goto out;
- lock_page(page);
+ folio_lock(folio);
/*
* If the page is no longer dirty, or its mapping no
* longer corresponds to inode we are writing (which
@@ -2647,16 +2646,16 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
* page is already under writeback and we are not doing
* a data integrity writeback, skip the page
*/
- if (!PageDirty(page) ||
- (PageWriteback(page) &&
+ if (!folio_test_dirty(folio) ||
+ (folio_test_writeback(folio) &&
(mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
- unlikely(page->mapping != mapping)) {
- unlock_page(page);
+ unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
continue;
}
- wait_on_page_writeback(page);
- BUG_ON(PageWriteback(page));
+ folio_wait_writeback(folio);
+ BUG_ON(folio_test_writeback(folio));
/*
* Should never happen but for buggy code in
@@ -2667,56 +2666,56 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
*
* [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
*/
- if (!page_has_buffers(page)) {
- ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index);
- ClearPageDirty(page);
- unlock_page(page);
+ if (!folio_buffers(folio)) {
+ ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", folio->index);
+ folio_clear_dirty(folio);
+ folio_unlock(folio);
continue;
}
if (mpd->map.m_len == 0)
- mpd->first_page = page->index;
- mpd->next_page = page->index + 1;
+ mpd->first_page = folio->index;
+ mpd->next_page = folio->index + folio_nr_pages(folio);
/*
* Writeout for transaction commit where we cannot
* modify metadata is simple. Just submit the page.
*/
if (!mpd->can_map) {
- if (ext4_page_nomap_can_writeout(page)) {
- err = mpage_submit_page(mpd, page);
+ if (ext4_page_nomap_can_writeout(&folio->page)) {
+ err = mpage_submit_page(mpd, &folio->page);
if (err < 0)
goto out;
} else {
- unlock_page(page);
- mpd->first_page++;
+ folio_unlock(folio);
+ mpd->first_page += folio_nr_pages(folio);
}
} else {
/* Add all dirty buffers to mpd */
- lblk = ((ext4_lblk_t)page->index) <<
+ lblk = ((ext4_lblk_t)folio->index) <<
(PAGE_SHIFT - blkbits);
- head = page_buffers(page);
+ head = folio_buffers(folio);
err = mpage_process_page_bufs(mpd, head, head,
- lblk);
+ lblk);
if (err <= 0)
goto out;
err = 0;
}
- left--;
+ left -= folio_nr_pages(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
mpd->scanned_until_end = 1;
return 0;
out:
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
return err;
}
-static int ext4_writepage_cb(struct page *page, struct writeback_control *wbc,
+static int ext4_writepage_cb(struct folio *folio, struct writeback_control *wbc,
void *data)
{
- return ext4_writepage(page, wbc);
+ return ext4_writepage(&folio->page, wbc);
}
static int ext4_do_writepages(struct mpage_da_data *mpd)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 8dbb87edf24c..2de9829aed63 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -110,22 +110,23 @@ out:
}
/**
- * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
+ * mext_folio_double_lock - Grab and lock folio on both @inode1 and @inode2
*
* @inode1: the inode structure
* @inode2: the inode structure
- * @index1: page index
- * @index2: page index
- * @page: result page vector
+ * @index1: folio index
+ * @index2: folio index
+ * @folio: result folio vector
*
- * Grab two locked pages for inode's by inode order
+ * Grab two locked folio for inode's by inode order
*/
static int
-mext_page_double_lock(struct inode *inode1, struct inode *inode2,
- pgoff_t index1, pgoff_t index2, struct page *page[2])
+mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
+ pgoff_t index1, pgoff_t index2, struct folio *folio[2])
{
struct address_space *mapping[2];
unsigned int flags;
+ unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
BUG_ON(!inode1 || !inode2);
if (inode1 < inode2) {
@@ -138,28 +139,30 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
}
flags = memalloc_nofs_save();
- page[0] = grab_cache_page_write_begin(mapping[0], index1);
- if (!page[0]) {
+ folio[0] = __filemap_get_folio(mapping[0], index1, fgp_flags,
+ mapping_gfp_mask(mapping[0]));
+ if (!folio[0]) {
memalloc_nofs_restore(flags);
return -ENOMEM;
}
- page[1] = grab_cache_page_write_begin(mapping[1], index2);
+ folio[1] = __filemap_get_folio(mapping[1], index2, fgp_flags,
+ mapping_gfp_mask(mapping[1]));
memalloc_nofs_restore(flags);
- if (!page[1]) {
- unlock_page(page[0]);
- put_page(page[0]);
+ if (!folio[1]) {
+ folio_unlock(folio[0]);
+ folio_put(folio[0]);
return -ENOMEM;
}
/*
- * grab_cache_page_write_begin() may not wait on page's writeback if
+ * __filemap_get_folio() may not wait on folio's writeback if
* BDI not demand that. But it is reasonable to be very conservative
- * here and explicitly wait on page's writeback
+ * here and explicitly wait on folio's writeback
*/
- wait_on_page_writeback(page[0]);
- wait_on_page_writeback(page[1]);
+ folio_wait_writeback(folio[0]);
+ folio_wait_writeback(folio[1]);
if (inode1 > inode2)
- swap(page[0], page[1]);
+ swap(folio[0], folio[1]);
return 0;
}
@@ -252,7 +255,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
int block_len_in_page, int unwritten, int *err)
{
struct inode *orig_inode = file_inode(o_filp);
- struct page *pagep[2] = {NULL, NULL};
struct folio *folio[2] = {NULL, NULL};
handle_t *handle;
ext4_lblk_t orig_blk_offset, donor_blk_offset;
@@ -303,8 +305,8 @@ again:
replaced_size = data_size;
- *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
- donor_page_offset, pagep);
+ *err = mext_folio_double_lock(orig_inode, donor_inode, orig_page_offset,
+ donor_page_offset, folio);
if (unlikely(*err < 0))
goto stop_journal;
/*
@@ -314,8 +316,6 @@ again:
* hold page's lock, if it is still the case data copy is not
* necessary, just swap data blocks between orig and donor.
*/
- folio[0] = page_folio(pagep[0]);
- folio[1] = page_folio(pagep[1]);
VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2ae46d11aa30..faae05493471 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -482,7 +482,7 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
*
* However, we may have to redirty a page (see below.)
*/
-static int ext4_journalled_writepage_callback(struct page *page,
+static int ext4_journalled_writepage_callback(struct folio *folio,
struct writeback_control *wbc,
void *data)
{
@@ -490,7 +490,7 @@ static int ext4_journalled_writepage_callback(struct page *page,
struct buffer_head *bh, *head;
struct journal_head *jh;
- bh = head = page_buffers(page);
+ bh = head = folio_buffers(folio);
do {
/*
* We have to redirty a page in these cases:
@@ -509,7 +509,7 @@ static int ext4_journalled_writepage_callback(struct page *page,
if (buffer_dirty(bh) ||
(jh && (jh->b_transaction != transaction ||
jh->b_next_transaction))) {
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
goto out;
}
} while ((bh = bh->b_this_page) != head);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 56f7d0d6a8b2..5a5515d83a1b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -395,59 +395,62 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
{
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, prev = ULONG_MAX;
- struct pagevec pvec;
+ struct folio_batch fbatch;
long nwritten = 0;
- int nr_pages;
+ int nr_folios;
struct writeback_control wbc = {
.for_reclaim = 0,
};
struct blk_plug plug;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
blk_start_plug(&plug);
- while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY))) {
+ while ((nr_folios = filemap_get_folios_tag(mapping, &index,
+ (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch))) {
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch.folios[i];
- if (prev == ULONG_MAX)
- prev = page->index - 1;
- if (nr_to_write != LONG_MAX && page->index != prev + 1) {
- pagevec_release(&pvec);
+ if (nr_to_write != LONG_MAX && i != 0 &&
+ folio->index != prev +
+ folio_nr_pages(fbatch.folios[i-1])) {
+ folio_batch_release(&fbatch);
goto stop;
}
- lock_page(page);
+ folio_lock(folio);
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(folio->mapping != mapping)) {
continue_unlock:
- unlock_page(page);
+ folio_unlock(folio);
continue;
}
- if (!PageDirty(page)) {
+ if (!folio_test_dirty(folio)) {
/* someone wrote it for us */
goto continue_unlock;
}
- f2fs_wait_on_page_writeback(page, META, true, true);
+ f2fs_wait_on_page_writeback(&folio->page, META,
+ true, true);
- if (!clear_page_dirty_for_io(page))
+ if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
- if (__f2fs_write_meta_page(page, &wbc, io_type)) {
- unlock_page(page);
+ if (__f2fs_write_meta_page(&folio->page, &wbc,
+ io_type)) {
+ folio_unlock(folio);
break;
}
- nwritten++;
- prev = page->index;
+ nwritten += folio_nr_pages(folio);
+ prev = folio->index;
if (unlikely(nwritten >= nr_to_write))
break;
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
stop:
@@ -1403,7 +1406,7 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
};
/*
- * pagevec_lookup_tag and lock_page again will take
+ * filemap_get_folios_tag and lock_page again will take
* some extra time. Therefore, f2fs_update_meta_pages and
* f2fs_sync_meta_pages are combined in this function.
*/
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8630df80fedb..41addc605350 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2956,6 +2956,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int ret = 0;
int done = 0, retry = 0;
struct page *pages[F2FS_ONSTACK_PAGES];
+ struct folio_batch fbatch;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
struct bio *bio = NULL;
sector_t last_block;
@@ -2976,6 +2977,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
.private = NULL,
};
#endif
+ int nr_folios, p, idx;
int nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
@@ -2986,6 +2988,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int submitted = 0;
int i;
+ folio_batch_init(&fbatch);
+
if (get_dirty_pages(mapping->host) <=
SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
set_inode_flag(mapping->host, FI_HOT_DATA);
@@ -3011,13 +3015,38 @@ retry:
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && !retry && (index <= end)) {
- nr_pages = find_get_pages_range_tag(mapping, &index, end,
- tag, F2FS_ONSTACK_PAGES, pages);
- if (nr_pages == 0)
+ nr_pages = 0;
+again:
+ nr_folios = filemap_get_folios_tag(mapping, &index, end,
+ tag, &fbatch);
+ if (nr_folios == 0) {
+ if (nr_pages)
+ goto write;
break;
+ }
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch.folios[i];
+
+ idx = 0;
+ p = folio_nr_pages(folio);
+add_more:
+ pages[nr_pages] = folio_page(folio, idx);
+ folio_get(folio);
+ if (++nr_pages == F2FS_ONSTACK_PAGES) {
+ index = folio->index + idx + 1;
+ folio_batch_release(&fbatch);
+ goto write;
+ }
+ if (++idx < p)
+ goto add_more;
+ }
+ folio_batch_release(&fbatch);
+ goto again;
+write:
for (i = 0; i < nr_pages; i++) {
struct page *page = pages[i];
+ struct folio *folio = page_folio(page);
bool need_readd;
readd:
need_readd = false;
@@ -3034,7 +3063,7 @@ readd:
}
if (!f2fs_cluster_can_merge_page(&cc,
- page->index)) {
+ folio->index)) {
ret = f2fs_write_multi_pages(&cc,
&submitted, wbc, io_type);
if (!ret)
@@ -3043,27 +3072,28 @@ readd:
}
if (unlikely(f2fs_cp_error(sbi)))
- goto lock_page;
+ goto lock_folio;
if (!f2fs_cluster_is_empty(&cc))
- goto lock_page;
+ goto lock_folio;
if (f2fs_all_cluster_page_ready(&cc,
pages, i, nr_pages, true))
- goto lock_page;
+ goto lock_folio;
ret2 = f2fs_prepare_compress_overwrite(
inode, &pagep,
- page->index, &fsdata);
+ folio->index, &fsdata);
if (ret2 < 0) {
ret = ret2;
done = 1;
break;
} else if (ret2 &&
(!f2fs_compress_write_end(inode,
- fsdata, page->index, 1) ||
+ fsdata, folio->index, 1) ||
!f2fs_all_cluster_page_ready(&cc,
- pages, i, nr_pages, false))) {
+ pages, i, nr_pages,
+ false))) {
retry = 1;
break;
}
@@ -3076,46 +3106,47 @@ readd:
break;
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
-lock_page:
+lock_folio:
#endif
- done_index = page->index;
+ done_index = folio->index;
retry_write:
- lock_page(page);
+ folio_lock(folio);
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(folio->mapping != mapping)) {
continue_unlock:
- unlock_page(page);
+ folio_unlock(folio);
continue;
}
- if (!PageDirty(page)) {
+ if (!folio_test_dirty(folio)) {
/* someone wrote it for us */
goto continue_unlock;
}
- if (PageWriteback(page)) {
+ if (folio_test_writeback(folio)) {
if (wbc->sync_mode != WB_SYNC_NONE)
- f2fs_wait_on_page_writeback(page,
+ f2fs_wait_on_page_writeback(
+ &folio->page,
DATA, true, true);
else
goto continue_unlock;
}
- if (!clear_page_dirty_for_io(page))
+ if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
- get_page(page);
- f2fs_compress_ctx_add_page(&cc, page);
+ folio_get(folio);
+ f2fs_compress_ctx_add_page(&cc, &folio->page);
continue;
}
#endif
- ret = f2fs_write_single_data_page(page, &submitted,
- &bio, &last_block, wbc, io_type,
- 0, true);
+ ret = f2fs_write_single_data_page(&folio->page,
+ &submitted, &bio, &last_block,
+ wbc, io_type, 0, true);
if (ret == AOP_WRITEPAGE_ACTIVATE)
- unlock_page(page);
+ folio_unlock(folio);
#ifdef CONFIG_F2FS_FS_COMPRESSION
result:
#endif
@@ -3139,7 +3170,8 @@ result:
}
goto next;
}
- done_index = page->index + 1;
+ done_index = folio->index +
+ folio_nr_pages(folio);
done = 1;
break;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index dde4c0458704..cf997356d9f9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1518,23 +1518,24 @@ iput_out:
static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
{
pgoff_t index;
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct page *last_page = NULL;
- int nr_pages;
+ int nr_folios;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
index = 0;
- while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY))) {
+ while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
+ (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
+ &fbatch))) {
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct page *page = &fbatch.folios[i]->page;
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
return ERR_PTR(-EIO);
}
@@ -1565,7 +1566,7 @@ continue_unlock:
last_page = page;
unlock_page(page);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
return last_page;
@@ -1731,12 +1732,12 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
unsigned int *seq_id)
{
pgoff_t index;
- struct pagevec pvec;
+ struct folio_batch fbatch;
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
- int nr_pages;
+ int nr_folios;
int nwritten = 0;
if (atomic) {
@@ -1745,20 +1746,21 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
return PTR_ERR_OR_ZERO(last_page);
}
retry:
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
index = 0;
- while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_DIRTY))) {
+ while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
+ (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
+ &fbatch))) {
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct page *page = &fbatch.folios[i]->page;
bool submitted = false;
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
ret = -EIO;
goto out;
}
@@ -1824,7 +1826,7 @@ continue_unlock:
break;
}
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
if (ret || marked)
@@ -1889,17 +1891,18 @@ static bool flush_dirty_inode(struct page *page)
void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
{
pgoff_t index = 0;
- struct pagevec pvec;
- int nr_pages;
+ struct folio_batch fbatch;
+ int nr_folios;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
- while ((nr_pages = pagevec_lookup_tag(&pvec,
- NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
+ while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
+ (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
+ &fbatch))) {
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct page *page = &fbatch.folios[i]->page;
if (!IS_DNODE(page))
continue;
@@ -1926,7 +1929,7 @@ continue_unlock:
}
unlock_page(page);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
@@ -1936,23 +1939,24 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
bool do_balance, enum iostat_type io_type)
{
pgoff_t index;
- struct pagevec pvec;
+ struct folio_batch fbatch;
int step = 0;
int nwritten = 0;
int ret = 0;
- int nr_pages, done = 0;
+ int nr_folios, done = 0;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
next_step:
index = 0;
- while (!done && (nr_pages = pagevec_lookup_tag(&pvec,
- NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
+ while (!done && (nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi),
+ &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
+ &fbatch))) {
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct page *page = &fbatch.folios[i]->page;
bool submitted = false;
/* give a priority to WB_SYNC threads */
@@ -2027,7 +2031,7 @@ write_node:
if (--wbc->nr_to_write == 0)
break;
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
if (wbc->nr_to_write == 0) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6fba5a52127b..195dc23e0d83 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -237,7 +237,7 @@ void wb_wait_for_completion(struct wb_completion *done)
static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
static struct workqueue_struct *isw_wq;
-void __inode_attach_wb(struct inode *inode, struct page *page)
+void __inode_attach_wb(struct inode *inode, struct folio *folio)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
struct bdi_writeback *wb = NULL;
@@ -245,8 +245,8 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
if (inode_cgwb_enabled(inode)) {
struct cgroup_subsys_state *memcg_css;
- if (page) {
- memcg_css = mem_cgroup_css_from_page(page);
+ if (folio) {
+ memcg_css = mem_cgroup_css_from_folio(folio);
wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
} else {
/* must pin memcg_css, see wb_get_create() */
@@ -859,6 +859,7 @@ EXPORT_SYMBOL_GPL(wbc_detach_inode);
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
size_t bytes)
{
+ struct folio *folio;
struct cgroup_subsys_state *css;
int id;
@@ -871,7 +872,8 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
if (!wbc->wb || wbc->no_cgroup_owner)
return;
- css = mem_cgroup_css_from_page(page);
+ folio = page_folio(page);
+ css = mem_cgroup_css_from_folio(folio);
/* dead cgroups shouldn't contribute to inode ownership arbitration */
if (!(css->flags & CSS_ONLINE))
return;
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index e23e802a8013..8e74f278a3f6 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -860,7 +860,7 @@ int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
vma->vm_ops = &fuse_dax_vm_ops;
- vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+ vm_flags_set(vma, VM_MIXEDMAP | VM_HUGEPAGE);
return 0;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 82710d103556..ff0b3ef774d4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2186,7 +2186,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
return false;
}
-static int fuse_writepages_fill(struct page *page,
+static int fuse_writepages_fill(struct folio *folio,
struct writeback_control *wbc, void *_data)
{
struct fuse_fill_wb_data *data = _data;
@@ -2205,7 +2205,7 @@ static int fuse_writepages_fill(struct page *page,
goto out_unlock;
}
- if (wpa && fuse_writepage_need_send(fc, page, ap, data)) {
+ if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
fuse_writepages_send(data);
data->wpa = NULL;
}
@@ -2240,7 +2240,7 @@ static int fuse_writepages_fill(struct page *page,
data->max_pages = 1;
ap = &wpa->ia.ap;
- fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0);
+ fuse_write_args_fill(&wpa->ia, data->ff, folio_pos(folio), 0);
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
wpa->next = NULL;
ap->args.in_pages = true;
@@ -2248,13 +2248,13 @@ static int fuse_writepages_fill(struct page *page,
ap->num_pages = 0;
wpa->inode = inode;
}
- set_page_writeback(page);
+ folio_start_writeback(folio);
- copy_highpage(tmp_page, page);
+ copy_highpage(tmp_page, &folio->page);
ap->pages[ap->num_pages] = tmp_page;
ap->descs[ap->num_pages].offset = 0;
ap->descs[ap->num_pages].length = PAGE_SIZE;
- data->orig_pages[ap->num_pages] = page;
+ data->orig_pages[ap->num_pages] = &folio->page;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
@@ -2268,13 +2268,13 @@ static int fuse_writepages_fill(struct page *page,
spin_lock(&fi->lock);
ap->num_pages++;
spin_unlock(&fi->lock);
- } else if (fuse_writepage_add(wpa, page)) {
+ } else if (fuse_writepage_add(wpa, &folio->page)) {
data->wpa = wpa;
} else {
- end_page_writeback(page);
+ folio_end_writeback(folio);
}
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 2748a82de42a..a5f4be6b9213 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -194,67 +194,71 @@ static int gfs2_writepages(struct address_space *mapping,
}
/**
- * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
+ * gfs2_write_jdata_batch - Write back a folio batch's worth of folios
* @mapping: The mapping
* @wbc: The writeback control
- * @pvec: The vector of pages
- * @nr_pages: The number of pages to write
+ * @fbatch: The batch of folios
* @done_index: Page index
*
* Returns: non-zero if loop should terminate, zero otherwise
*/
-static int gfs2_write_jdata_pagevec(struct address_space *mapping,
+static int gfs2_write_jdata_batch(struct address_space *mapping,
struct writeback_control *wbc,
- struct pagevec *pvec,
- int nr_pages,
+ struct folio_batch *fbatch,
pgoff_t *done_index)
{
struct inode *inode = mapping->host;
struct gfs2_sbd *sdp = GFS2_SB(inode);
- unsigned nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits);
+ unsigned nrblocks;
int i;
int ret;
+ int nr_pages = 0;
+ int nr_folios = folio_batch_count(fbatch);
+
+ for (i = 0; i < nr_folios; i++)
+ nr_pages += folio_nr_pages(fbatch->folios[i]);
+ nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits);
ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
if (ret < 0)
return ret;
- for(i = 0; i < nr_pages; i++) {
- struct page *page = pvec->pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch->folios[i];
- *done_index = page->index;
+ *done_index = folio->index;
- lock_page(page);
+ folio_lock(folio);
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(folio->mapping != mapping)) {
continue_unlock:
- unlock_page(page);
+ folio_unlock(folio);
continue;
}
- if (!PageDirty(page)) {
+ if (!folio_test_dirty(folio)) {
/* someone wrote it for us */
goto continue_unlock;
}
- if (PageWriteback(page)) {
+ if (folio_test_writeback(folio)) {
if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
else
goto continue_unlock;
}
- BUG_ON(PageWriteback(page));
- if (!clear_page_dirty_for_io(page))
+ BUG_ON(folio_test_writeback(folio));
+ if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
trace_wbc_writepage(wbc, inode_to_bdi(inode));
- ret = __gfs2_jdata_writepage(page, wbc);
+ ret = __gfs2_jdata_writepage(&folio->page, wbc);
if (unlikely(ret)) {
if (ret == AOP_WRITEPAGE_ACTIVATE) {
- unlock_page(page);
+ folio_unlock(folio);
ret = 0;
} else {
@@ -267,7 +271,8 @@ continue_unlock:
* not be suitable for data integrity
* writeout).
*/
- *done_index = page->index + 1;
+ *done_index = folio->index +
+ folio_nr_pages(folio);
ret = 1;
break;
}
@@ -304,8 +309,8 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
{
int ret = 0;
int done = 0;
- struct pagevec pvec;
- int nr_pages;
+ struct folio_batch fbatch;
+ int nr_folios;
pgoff_t writeback_index;
pgoff_t index;
pgoff_t end;
@@ -314,7 +319,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
int range_whole = 0;
xa_mark_t tag;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
@@ -340,17 +345,18 @@ retry:
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
- nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
- tag);
- if (nr_pages == 0)
+ nr_folios = filemap_get_folios_tag(mapping, &index, end,
+ tag, &fbatch);
+ if (nr_folios == 0)
break;
- ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index);
+ ret = gfs2_write_jdata_batch(mapping, wbc, &fbatch,
+ &done_index);
if (ret)
done = 1;
if (ret > 0)
ret = 0;
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index ad14818a790a..4d99cc77a29b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -39,7 +39,7 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
"AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
"state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
- bh->b_page->mapping, bh->b_page->flags);
+ bh->b_folio->mapping, bh->b_folio->flags);
fs_err(sdp, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 61323deb80bc..d750d1128bed 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -80,11 +80,11 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
brelse(bd->bd_bh);
}
-static int __gfs2_writepage(struct page *page, struct writeback_control *wbc,
+static int __gfs2_writepage(struct folio *folio, struct writeback_control *wbc,
void *data)
{
struct address_space *mapping = data;
- int ret = mapping->a_ops->writepage(page, wbc);
+ int ret = mapping->a_ops->writepage(&folio->page, wbc);
mapping_set_error(mapping, ret);
return ret;
}
@@ -136,7 +136,7 @@ __acquires(&sdp->sd_ail_lock)
continue;
gl = bd->bd_gl;
list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
- mapping = bh->b_page->mapping;
+ mapping = bh->b_folio->mapping;
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3c41b864ee5b..924361fa510b 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -334,7 +334,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
{
- struct address_space *mapping = bh->b_page->mapping;
+ struct address_space *mapping = bh->b_folio->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
struct gfs2_trans *tr = current->journal_info;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0ce1cc4c2add..9062da6da567 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -132,7 +132,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
- vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+ vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
vma->vm_ops = &hugetlb_vm_ops;
ret = seal_check_future_write(info->seals, vma);
@@ -388,9 +388,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
{
pte_t *ptep, pte;
- ptep = huge_pte_offset(vma->vm_mm, addr,
- huge_page_size(hstate_vma(vma)));
-
+ ptep = hugetlb_walk(vma, addr, huge_page_size(hstate_vma(vma)));
if (!ptep)
return false;
@@ -412,10 +410,12 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
*/
static unsigned long vma_offset_start(struct vm_area_struct *vma, pgoff_t start)
{
+ unsigned long offset = 0;
+
if (vma->vm_pgoff < start)
- return (start - vma->vm_pgoff) << PAGE_SHIFT;
- else
- return 0;
+ offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
+
+ return vma->vm_start + offset;
}
static unsigned long vma_offset_end(struct vm_area_struct *vma, pgoff_t end)
@@ -457,7 +457,7 @@ retry:
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- if (!hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
+ if (!hugetlb_vma_maps_page(vma, v_start, page))
continue;
if (!hugetlb_vma_trylock_write(vma)) {
@@ -473,8 +473,8 @@ retry:
break;
}
- unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
- NULL, ZAP_FLAG_DROP_MARKER);
+ unmap_hugepage_range(vma, v_start, v_end, NULL,
+ ZAP_FLAG_DROP_MARKER);
hugetlb_vma_unlock_write(vma);
}
@@ -507,10 +507,9 @@ retry:
*/
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- if (hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
- unmap_hugepage_range(vma, vma->vm_start + v_start,
- v_end, NULL,
- ZAP_FLAG_DROP_MARKER);
+ if (hugetlb_vma_maps_page(vma, v_start, page))
+ unmap_hugepage_range(vma, v_start, v_end, NULL,
+ ZAP_FLAG_DROP_MARKER);
kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
hugetlb_vma_unlock_write(vma);
@@ -540,8 +539,7 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
- NULL, zap_flags);
+ unmap_hugepage_range(vma, v_start, v_end, NULL, zap_flags);
/*
* Note that vma lock only exists for shared/non-private
@@ -813,7 +811,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* as input to create an allocation policy.
*/
vma_init(&pseudo_vma, mm);
- pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
+ vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
for (index = start; index < end; index++) {
@@ -821,8 +819,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* This is supposed to be the vaddr where the page is being
* faulted in, but we have no vaddr here.
*/
- struct page *page;
+ struct folio *folio;
unsigned long addr;
+ bool present;
cond_resched();
@@ -846,48 +845,49 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- page = find_get_page(mapping, index);
- if (page) {
- put_page(page);
+ rcu_read_lock();
+ present = page_cache_next_miss(mapping, index, 1) != index;
+ rcu_read_unlock();
+ if (present) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
hugetlb_drop_vma_policy(&pseudo_vma);
continue;
}
/*
- * Allocate page without setting the avoid_reserve argument.
+ * Allocate folio without setting the avoid_reserve argument.
* There certainly are no reserves associated with the
* pseudo_vma. However, there could be shared mappings with
* reserves for the file at the inode level. If we fallocate
- * pages in these areas, we need to consume the reserves
+ * folios in these areas, we need to consume the reserves
* to keep reservation accounting consistent.
*/
- page = alloc_huge_page(&pseudo_vma, addr, 0);
+ folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
hugetlb_drop_vma_policy(&pseudo_vma);
- if (IS_ERR(page)) {
+ if (IS_ERR(folio)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- error = PTR_ERR(page);
+ error = PTR_ERR(folio);
goto out;
}
- clear_huge_page(page, addr, pages_per_huge_page(h));
- __SetPageUptodate(page);
- error = hugetlb_add_to_page_cache(page, mapping, index);
+ clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
+ __folio_mark_uptodate(folio);
+ error = hugetlb_add_to_page_cache(folio, mapping, index);
if (unlikely(error)) {
- restore_reserve_on_error(h, &pseudo_vma, addr, page);
- put_page(page);
+ restore_reserve_on_error(h, &pseudo_vma, addr, folio);
+ folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
goto out;
}
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- SetHPageMigratable(page);
+ folio_set_hugetlb_migratable(folio);
/*
- * unlock_page because locked by hugetlb_add_to_page_cache()
- * put_page() due to reference from alloc_huge_page()
+ * folio_unlock because locked by hugetlb_add_to_page_cache()
+ * folio_put() due to reference from alloc_hugetlb_folio()
*/
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d3c300563eb8..6f4c97a6d7e9 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1714,10 +1714,9 @@ done:
* For unwritten space on the page, we need to start the conversion to
* regular allocated space.
*/
-static int
-iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
+static int iomap_do_writepage(struct folio *folio,
+ struct writeback_control *wbc, void *data)
{
- struct folio *folio = page_folio(page);
struct iomap_writepage_ctx *wpc = data;
struct inode *inode = folio->mapping->host;
u64 end_pos, isize;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 4810438b7856..b33155dd7001 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -63,16 +63,12 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
static void release_buffer_page(struct buffer_head *bh)
{
struct folio *folio;
- struct page *page;
if (buffer_dirty(bh))
goto nope;
if (atomic_read(&bh->b_count) != 1)
goto nope;
- page = bh->b_page;
- if (!page)
- goto nope;
- folio = page_folio(page);
+ folio = bh->b_folio;
if (folio->mapping)
goto nope;
@@ -181,31 +177,6 @@ static int journal_wait_on_commit_record(journal_t *journal,
return ret;
}
-/*
- * write the filemap data using writepage() address_space_operations.
- * We don't do block allocation here even for delalloc. We don't
- * use writepages() because with delayed allocation we may be doing
- * block allocation in writepages().
- */
-int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
-{
- struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = mapping->nrpages * 2,
- .range_start = jinode->i_dirty_start,
- .range_end = jinode->i_dirty_end,
- };
-
- /*
- * submit the inode data buffers. We use writepage
- * instead of writepages. Because writepages can do
- * block allocation with delalloc. We need to write
- * only allocated blocks here.
- */
- return generic_writepages(mapping, &wbc);
-}
-
/* Send all the data buffers related to an inode */
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
@@ -1040,7 +1011,7 @@ restart_loop:
* already detached from the mapping and buffers cannot
* get reused.
*/
- mapping = READ_ONCE(bh->b_page->mapping);
+ mapping = READ_ONCE(bh->b_folio->mapping);
if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
clear_buffer_mapped(bh);
clear_buffer_new(bh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2696f43e7239..e80c781731f8 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -89,7 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
-EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers);
EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
@@ -2938,7 +2937,7 @@ repeat:
} else {
J_ASSERT_BH(bh,
(atomic_read(&bh->b_count) > 0) ||
- (bh->b_page && bh->b_page->mapping));
+ (bh->b_folio && bh->b_folio->mapping));
if (!new_jh) {
jbd_unlock_bh_journal_head(bh);
diff --git a/fs/mpage.c b/fs/mpage.c
index ce53179428db..22b9de5ddd68 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -198,7 +198,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
/*
* Then do more get_blocks calls until we are done with this folio.
*/
- map_bh->b_page = &folio->page;
+ map_bh->b_folio = folio;
while (page_block < blocks_per_page) {
map_bh->b_state = 0;
map_bh->b_size = 0;
@@ -269,11 +269,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
alloc_new:
if (args->bio == NULL) {
- if (first_hole == blocks_per_page) {
- if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
- &folio->page))
- goto out;
- }
args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf,
gfp);
if (args->bio == NULL)
@@ -445,15 +440,14 @@ void clean_page_buffers(struct page *page)
clean_buffers(page, ~0U);
}
-static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
+static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
void *data)
{
struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
- struct address_space *mapping = page->mapping;
- struct inode *inode = page->mapping->host;
+ struct address_space *mapping = folio->mapping;
+ struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
- unsigned long end_index;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
sector_t last_block;
sector_t block_in_file;
@@ -464,13 +458,13 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
int boundary = 0;
sector_t boundary_block = 0;
struct block_device *boundary_bdev = NULL;
- int length;
+ size_t length;
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ struct buffer_head *head = folio_buffers(folio);
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
+ if (head) {
struct buffer_head *bh = head;
/* If they're all mapped and dirty, do it */
@@ -522,10 +516,16 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
/*
* The page has no buffers: map it to disk
*/
- BUG_ON(!PageUptodate(page));
- block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
+ BUG_ON(!folio_test_uptodate(folio));
+ block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
+ /*
+ * Whole page beyond EOF? Skip allocating blocks to avoid leaking
+ * space.
+ */
+ if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits)
+ goto page_is_mapped;
last_block = (i_size - 1) >> blkbits;
- map_bh.b_page = page;
+ map_bh.b_folio = folio;
for (page_block = 0; page_block < blocks_per_page; ) {
map_bh.b_state = 0;
@@ -556,8 +556,11 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
first_unmapped = page_block;
page_is_mapped:
- end_index = i_size >> PAGE_SHIFT;
- if (page->index >= end_index) {
+ /* Don't bother writing beyond EOF, truncate will discard the folio */
+ if (folio_pos(folio) >= i_size)
+ goto confused;
+ length = folio_size(folio);
+ if (folio_pos(folio) + length > i_size) {
/*
* The page straddles i_size. It must be zeroed out on each
* and every writepage invocation because it may be mmapped.
@@ -566,11 +569,8 @@ page_is_mapped:
* is zeroed when mapped, and writes to that region are not
* written out to the file."
*/
- unsigned offset = i_size & (PAGE_SIZE - 1);
-
- if (page->index > end_index || !offset)
- goto confused;
- zero_user_segment(page, offset, PAGE_SIZE);
+ length = i_size - folio_pos(folio);
+ folio_zero_segment(folio, length, folio_size(folio));
}
/*
@@ -581,11 +581,6 @@ page_is_mapped:
alloc_new:
if (bio == NULL) {
- if (first_unmapped == blocks_per_page) {
- if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
- page, wbc))
- goto out;
- }
bio = bio_alloc(bdev, BIO_MAX_VECS,
REQ_OP_WRITE | wbc_to_write_flags(wbc),
GFP_NOFS);
@@ -598,18 +593,18 @@ alloc_new:
* the confused fail path above (OOM) will be very confused when
* it finds all bh marked clean (i.e. it will not write anything)
*/
- wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
+ wbc_account_cgroup_owner(wbc, &folio->page, folio_size(folio));
length = first_unmapped << blkbits;
- if (bio_add_page(bio, page, length, 0) < length) {
+ if (!bio_add_folio(bio, folio, length, 0)) {
bio = mpage_bio_submit(bio);
goto alloc_new;
}
- clean_buffers(page, first_unmapped);
+ clean_buffers(&folio->page, first_unmapped);
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+ folio_unlock(folio);
if (boundary || (first_unmapped != blocks_per_page)) {
bio = mpage_bio_submit(bio);
if (boundary_block) {
@@ -628,7 +623,7 @@ confused:
/*
* The caller has a ref on the inode, so *mapping is stable
*/
- ret = block_write_full_page(page, mpd->get_block, wbc);
+ ret = block_write_full_page(&folio->page, mpd->get_block, wbc);
mapping_set_error(mapping, ret);
out:
mpd->bio = bio;
@@ -643,14 +638,6 @@ out:
*
* This is a library function, which implements the writepages()
* address_space_operation.
- *
- * If a page is already under I/O, generic_writepages() skips it, even
- * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them. If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
*/
int
mpage_writepages(struct address_space *mapping,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b508c985eb14..f4cca8f00c0c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -688,15 +688,14 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
return ret;
}
-static int nfs_writepages_callback(struct page *page,
+static int nfs_writepages_callback(struct folio *folio,
struct writeback_control *wbc, void *data)
{
- struct folio *folio = page_folio(page);
int ret;
ret = nfs_do_writepage(folio, wbc, data);
if (ret != AOP_WRITEPAGE_ACTIVATE)
- unlock_page(page);
+ folio_unlock(folio);
return ret;
}
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index e74fda212620..e956f886a1a1 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -188,7 +188,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
struct page *opage = obh->b_page;
lock_page(opage);
retry:
- /* BUG_ON(oldkey != obh->b_page->index); */
+ /* BUG_ON(oldkey != obh->b_folio->index); */
if (unlikely(oldkey != opage->index))
NILFS_PAGE_BUG(opage,
"invalid oldkey %lld (newkey=%lld)",
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 40ce92a332fe..2681a449edc1 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -398,7 +398,7 @@ int nilfs_btree_broken_node_block(struct buffer_head *bh)
if (buffer_nilfs_checked(bh))
return 0;
- inode = bh->b_page->mapping->host;
+ inode = bh->b_folio->mapping->host;
ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
bh->b_size, inode, bh->b_blocknr);
if (likely(!ret))
@@ -2150,7 +2150,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
struct address_space *btcache = btnc_inode->i_mapping;
struct list_head lists[NILFS_BTREE_LEVEL_MAX];
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct buffer_head *bh, *head;
pgoff_t index = 0;
int level, i;
@@ -2160,19 +2160,19 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
level++)
INIT_LIST_HEAD(&lists[level]);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
- while (pagevec_lookup_tag(&pvec, btcache, &index,
- PAGECACHE_TAG_DIRTY)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- bh = head = page_buffers(pvec.pages[i]);
+ while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ bh = head = folio_buffers(fbatch.folios[i]);
do {
if (buffer_dirty(bh))
nilfs_btree_add_dirty_buffer(btree,
lists, bh);
} while ((bh = bh->b_this_page) != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index b0d22ff24b67..48fe71d309cb 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -140,7 +140,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
{
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- struct inode *inode = bh->b_page->mapping->host;
+ struct inode *inode = bh->b_folio->mapping->host;
nilfs_err(inode->i_sb,
"I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index cbf4fa60eea2..19c8158605ed 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -563,7 +563,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
struct page *page;
int blkbits = inode->i_blkbits;
- page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index);
+ page = grab_cache_page(shadow->inode->i_mapping, bh->b_folio->index);
if (!page)
return -ENOMEM;
@@ -595,7 +595,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
struct page *page;
int n;
- page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index);
+ page = find_lock_page(shadow->inode->i_mapping, bh->b_folio->index);
if (page) {
if (page_has_buffers(page)) {
n = bh_offset(bh) >> inode->i_blkbits;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 39b7eea2642a..41ccd43cd979 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -240,42 +240,43 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
int nilfs_copy_dirty_pages(struct address_space *dmap,
struct address_space *smap)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i;
pgoff_t index = 0;
int err = 0;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
repeat:
- if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY))
+ if (!filemap_get_folios_tag(smap, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch))
return 0;
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i], *dpage;
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i], *dfolio;
- lock_page(page);
- if (unlikely(!PageDirty(page)))
- NILFS_PAGE_BUG(page, "inconsistent dirty state");
+ folio_lock(folio);
+ if (unlikely(!folio_test_dirty(folio)))
+ NILFS_PAGE_BUG(&folio->page, "inconsistent dirty state");
- dpage = grab_cache_page(dmap, page->index);
- if (unlikely(!dpage)) {
+ dfolio = filemap_grab_folio(dmap, folio->index);
+ if (unlikely(!dfolio)) {
/* No empty page is added to the page cache */
err = -ENOMEM;
- unlock_page(page);
+ folio_unlock(folio);
break;
}
- if (unlikely(!page_has_buffers(page)))
- NILFS_PAGE_BUG(page,
+ if (unlikely(!folio_buffers(folio)))
+ NILFS_PAGE_BUG(&folio->page,
"found empty page in dat page cache");
- nilfs_copy_page(dpage, page, 1);
- __set_page_dirty_nobuffers(dpage);
+ nilfs_copy_page(&dfolio->page, &folio->page, 1);
+ filemap_dirty_folio(folio_mapping(dfolio), dfolio);
- unlock_page(dpage);
- put_page(dpage);
- unlock_page(page);
+ folio_unlock(dfolio);
+ folio_put(dfolio);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
if (likely(!err))
@@ -357,22 +358,22 @@ repeat:
*/
void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
unsigned int i;
pgoff_t index = 0;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
- while (pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
+ while (filemap_get_folios_tag(mapping, &index, (pgoff_t)-1,
+ PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
- lock_page(page);
- nilfs_clear_dirty_page(page, silent);
- unlock_page(page);
+ folio_lock(folio);
+ nilfs_clear_dirty_page(&folio->page, silent);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 76c3bd88b858..19446a8243d7 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -680,7 +680,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
loff_t start, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t index = 0, last = ULONG_MAX;
size_t ndirties = 0;
int i;
@@ -694,23 +694,26 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
index = start >> PAGE_SHIFT;
last = end >> PAGE_SHIFT;
}
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
repeat:
if (unlikely(index > last) ||
- !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
- PAGECACHE_TAG_DIRTY))
+ !filemap_get_folios_tag(mapping, &index, last,
+ PAGECACHE_TAG_DIRTY, &fbatch))
return ndirties;
- for (i = 0; i < pagevec_count(&pvec); i++) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct buffer_head *bh, *head;
- struct page *page = pvec.pages[i];
+ struct folio *folio = fbatch.folios[i];
- lock_page(page);
- if (!page_has_buffers(page))
- create_empty_buffers(page, i_blocksize(inode), 0);
- unlock_page(page);
+ folio_lock(folio);
+ head = folio_buffers(folio);
+ if (!head) {
+ create_empty_buffers(&folio->page, i_blocksize(inode), 0);
+ head = folio_buffers(folio);
+ }
+ folio_unlock(folio);
- bh = head = page_buffers(page);
+ bh = head;
do {
if (!buffer_dirty(bh) || buffer_async_write(bh))
continue;
@@ -718,13 +721,13 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
list_add_tail(&bh->b_assoc_buffers, listp);
ndirties++;
if (unlikely(ndirties >= nlimit)) {
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
return ndirties;
}
} while (bh = bh->b_this_page, bh != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
goto repeat;
}
@@ -734,20 +737,19 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
{
struct nilfs_inode_info *ii = NILFS_I(inode);
struct inode *btnc_inode = ii->i_assoc_inode;
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct buffer_head *bh, *head;
unsigned int i;
pgoff_t index = 0;
if (!btnc_inode)
return;
+ folio_batch_init(&fbatch);
- pagevec_init(&pvec);
-
- while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index,
- PAGECACHE_TAG_DIRTY)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- bh = head = page_buffers(pvec.pages[i]);
+ while (filemap_get_folios_tag(btnc_inode->i_mapping, &index,
+ (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ bh = head = folio_buffers(fbatch.folios[i]);
do {
if (buffer_dirty(bh) &&
!buffer_async_write(bh)) {
@@ -758,7 +760,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
bh = bh->b_this_page;
} while (bh != head);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
}
@@ -1581,7 +1583,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
nblocks = le32_to_cpu(finfo->fi_nblocks);
ndatablk = le32_to_cpu(finfo->fi_ndatablk);
- inode = bh->b_page->mapping->host;
+ inode = bh->b_folio->mapping->host;
if (mode == SC_LSEG_DSYNC)
sc_op = &nilfs_sc_dsync_ops;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 8ce2616b087f..309d9b46b5d5 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -832,32 +832,29 @@ out:
return err;
}
-static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
+static int ntfs_resident_writepage(struct folio *folio,
+ struct writeback_control *wbc, void *data)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- struct ntfs_inode *ni = ntfs_i(inode);
- int err;
+ struct address_space *mapping = data;
+ struct ntfs_inode *ni = ntfs_i(mapping->host);
+ int ret;
- if (is_resident(ni)) {
- ni_lock(ni);
- err = attr_data_write_resident(ni, page);
- ni_unlock(ni);
- if (err != E_NTFS_NONRESIDENT) {
- unlock_page(page);
- return err;
- }
- }
+ ni_lock(ni);
+ ret = attr_data_write_resident(ni, &folio->page);
+ ni_unlock(ni);
- return block_write_full_page(page, ntfs_get_block, wbc);
+ if (ret != E_NTFS_NONRESIDENT)
+ folio_unlock(folio);
+ mapping_set_error(mapping, ret);
+ return ret;
}
static int ntfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- /* Redirect call to 'ntfs_writepage' for resident files. */
if (is_resident(ntfs_i(mapping->host)))
- return generic_writepages(mapping, wbc);
+ return write_cache_pages(mapping, wbc, ntfs_resident_writepage,
+ mapping);
return mpage_writepages(mapping, wbc, ntfs_get_block);
}
@@ -2066,13 +2063,13 @@ const struct inode_operations ntfs_link_inode_operations = {
const struct address_space_operations ntfs_aops = {
.read_folio = ntfs_read_folio,
.readahead = ntfs_readahead,
- .writepage = ntfs_writepage,
.writepages = ntfs_writepages,
.write_begin = ntfs_write_begin,
.write_end = ntfs_write_end,
.direct_IO = ntfs_direct_IO,
.bmap = ntfs_bmap,
.dirty_folio = block_dirty_folio,
+ .migrate_folio = buffer_migrate_folio,
.invalidate_folio = block_invalidate_folio,
};
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 3fb98b4569a2..25d8072ccfce 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -15,6 +15,7 @@
#include <linux/time.h>
#include <linux/random.h>
#include <linux/delay.h>
+#include <linux/writeback.h>
#include <cluster/masklog.h>
@@ -841,6 +842,19 @@ bail:
return status;
}
+static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = mapping->nrpages * 2,
+ .range_start = jinode->i_dirty_start,
+ .range_end = jinode->i_dirty_end,
+ };
+
+ return filemap_fdatawrite_wbc(mapping, &wbc);
+}
+
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
{
int status = -1;
@@ -910,7 +924,7 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
journal->j_journal = j_journal;
journal->j_journal->j_submit_inode_data_buffers =
- jbd2_journal_submit_inode_data_buffers;
+ ocfs2_journal_submit_inode_data_buffers;
journal->j_journal->j_finish_inode_data_buffers =
jbd2_journal_finish_inode_data_buffers;
journal->j_inode = inode;
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 4ecb91a9bbeb..1a4301a38aa7 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -390,8 +390,7 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
"orangefs_file_mmap: called on %pD\n", file);
/* set the sequential readahead hint */
- vma->vm_flags |= VM_SEQ_READ;
- vma->vm_flags &= ~VM_RAND_READ;
+ vm_flags_mod(vma, VM_SEQ_READ, VM_RAND_READ);
file_accessed(file);
vma->vm_ops = &orangefs_file_vm_ops;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index c036851a6efe..aefdf1d3be7c 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -148,21 +148,20 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
return ret;
}
-static int orangefs_writepages_callback(struct page *page,
- struct writeback_control *wbc, void *data)
+static int orangefs_writepages_callback(struct folio *folio,
+ struct writeback_control *wbc, void *data)
{
struct orangefs_writepages *ow = data;
- struct orangefs_write_range *wr;
+ struct orangefs_write_range *wr = folio->private;
int ret;
- if (!PagePrivate(page)) {
- unlock_page(page);
+ if (!wr) {
+ folio_unlock(folio);
/* It's not private so there's nothing to write, right? */
printk("writepages_callback not private!\n");
BUG();
return 0;
}
- wr = (struct orangefs_write_range *)page_private(page);
ret = -1;
if (ow->npages == 0) {
@@ -170,7 +169,7 @@ static int orangefs_writepages_callback(struct page *page,
ow->len = wr->len;
ow->uid = wr->uid;
ow->gid = wr->gid;
- ow->pages[ow->npages++] = page;
+ ow->pages[ow->npages++] = &folio->page;
ret = 0;
goto done;
}
@@ -182,7 +181,7 @@ static int orangefs_writepages_callback(struct page *page,
}
if (ow->off + ow->len == wr->pos) {
ow->len += wr->len;
- ow->pages[ow->npages++] = page;
+ ow->pages[ow->npages++] = &folio->page;
ret = 0;
goto done;
}
@@ -192,10 +191,10 @@ done:
orangefs_writepages_work(ow, wbc);
ow->npages = 0;
}
- ret = orangefs_writepage_locked(page, wbc);
- mapping_set_error(page->mapping, ret);
- unlock_page(page);
- end_page_writeback(page);
+ ret = orangefs_writepage_locked(&folio->page, wbc);
+ mapping_set_error(folio->mapping, ret);
+ folio_unlock(folio);
+ folio_end_writeback(folio);
} else {
if (ow->npages == ow->maxpages) {
orangefs_writepages_work(ow, wbc);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index af1c49ae11b1..6a96e1713fd5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -890,7 +890,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
struct vm_area_struct *vma;
unsigned long vma_start = 0, last_vma_end = 0;
int ret = 0;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
priv->task = get_proc_task(priv->inode);
if (!priv->task)
@@ -908,7 +908,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
goto out_put_mm;
hold_task_mempolicy(priv);
- vma = mas_find(&mas, ULONG_MAX);
+ vma = vma_next(&vmi);
if (unlikely(!vma))
goto empty_set;
@@ -923,7 +923,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
* access it for write request.
*/
if (mmap_lock_is_contended(mm)) {
- mas_pause(&mas);
+ vma_iter_invalidate(&vmi);
mmap_read_unlock(mm);
ret = mmap_read_lock_killable(mm);
if (ret) {
@@ -948,31 +948,31 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
*
* 1) VMA2 is freed, but VMA3 exists:
*
- * find_vma(mm, 16k - 1) will return VMA3.
+ * vma_next(vmi) will return VMA3.
* In this case, just continue from VMA3.
*
* 2) VMA2 still exists:
*
- * find_vma(mm, 16k - 1) will return VMA2.
- * Iterate the loop like the original one.
+ * vma_next(vmi) will return VMA3.
+ * In this case, just continue from VMA3.
*
* 3) No more VMAs can be found:
*
- * find_vma(mm, 16k - 1) will return NULL.
+ * vma_next(vmi) will return NULL.
* No more things to do, just break.
*
* 4) (last_vma_end - 1) is the middle of a vma (VMA'):
*
- * find_vma(mm, 16k - 1) will return VMA' whose range
+ * vma_next(vmi) will return VMA' whose range
* contains last_vma_end.
* Iterate VMA' from last_vma_end.
*/
- vma = mas_find(&mas, ULONG_MAX);
+ vma = vma_next(&vmi);
/* Case 3 above */
if (!vma)
break;
- /* Case 1 above */
+ /* Case 1 and 2 above */
if (vma->vm_start >= last_vma_end)
continue;
@@ -980,8 +980,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
if (vma->vm_end > last_vma_end)
smap_gather_stats(vma, &mss, last_vma_end);
}
- /* Case 2 above */
- } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
+ } for_each_vma(vmi, vma);
empty_set:
show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0);
@@ -1277,7 +1276,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
struct mmu_notifier_range range;
struct clear_refs_private cp = {
.type = type,
@@ -1297,16 +1296,16 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- mas_for_each(&mas, vma, ULONG_MAX) {
+ for_each_vma(vmi, vma) {
if (!(vma->vm_flags & VM_SOFTDIRTY))
continue;
- vma->vm_flags &= ~VM_SOFTDIRTY;
+ vm_flags_clear(vma, VM_SOFTDIRTY);
vma_set_page_prot(vma);
}
inc_tlb_flush_pending(mm);
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
- 0, NULL, mm, 0, -1UL);
+ 0, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 2fd06f52b6a4..0ec35072a8e5 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -38,7 +38,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
}
if (atomic_read(&mm->mm_count) > 1 ||
- vma->vm_flags & VM_MAYSHARE) {
+ is_nommu_shared_mapping(vma->vm_flags)) {
sbytes += size;
} else {
bytes += size;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 09a81e4b1273..12af614f33ce 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -582,8 +582,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
if (vma->vm_flags & (VM_WRITE | VM_EXEC))
return -EPERM;
- vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
- vma->vm_flags |= VM_MIXEDMAP;
+ vm_flags_mod(vma, VM_MIXEDMAP, VM_MAYWRITE | VM_MAYEXEC);
vma->vm_ops = &vmcore_mmap_ops;
len = 0;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5bf74c2f6042..2f67516bb9bf 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -264,7 +264,7 @@ out:
*/
static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
{
- if (!(vma->vm_flags & (VM_SHARED | VM_MAYSHARE)))
+ if (!is_nommu_shared_mapping(vma->vm_flags))
return -ENOSYS;
file_accessed(file);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9f62da7471c9..9ce4ec296b74 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -601,7 +601,7 @@ static int journal_list_still_alive(struct super_block *s,
*/
static void release_buffer_page(struct buffer_head *bh)
{
- struct folio *folio = page_folio(bh->b_page);
+ struct folio *folio = bh->b_folio;
if (!folio->mapping && folio_trylock(folio)) {
folio_get(folio);
put_bh(bh);
@@ -866,7 +866,7 @@ loop_next:
* will ever write the buffer. We're safe if we write the
* page one last time after freeing the journal header.
*/
- if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
+ if (buffer_dirty(bh) && unlikely(bh->b_folio->mapping == NULL)) {
spin_unlock(lock);
write_dirty_buffer(bh, 0);
spin_lock(lock);
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index b0ae088dffc7..2cec61af2a9e 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -177,7 +177,7 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
* BUG() on attempt to write not mapped buffer
*/
if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
- struct inode *inode = bh->b_page->mapping->host;
+ struct inode *inode = bh->b_folio->mapping->host;
struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
spin_lock(&j->j_dirty_buffers_lock);
list_del_init(&bh->b_assoc_buffers);
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index 2c4a23113fb5..4578dc45e50a 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -63,7 +63,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
*/
static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
+ return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
}
static unsigned romfs_mmap_capabilities(struct file *file)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3b2adf4cbc57..f7a9607c2b95 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -185,9 +185,10 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int udf_adinicb_writepage(struct page *page,
+static int udf_adinicb_writepage(struct folio *folio,
struct writeback_control *wbc, void *data)
{
+ struct page *page = &folio->page;
struct inode *inode = page->mapping->host;
struct udf_inode_info *iinfo = UDF_I(inode);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index cc694846617a..44d1ee429eb0 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -113,7 +113,7 @@ static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
{
const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP;
- vma->vm_flags = flags;
+ vm_flags_reset(vma, flags);
/*
* For shared mappings, we want to enable writenotify while
* userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply
@@ -252,14 +252,12 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
unsigned long flags,
unsigned long reason)
{
- struct mm_struct *mm = ctx->mm;
pte_t *ptep, pte;
bool ret = true;
- mmap_assert_locked(mm);
-
- ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
+ mmap_assert_locked(ctx->mm);
+ ptep = hugetlb_walk(vma, address, vma_mmu_pagesize(vma));
if (!ptep)
goto out;
@@ -391,7 +389,8 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
*/
vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
{
- struct mm_struct *mm = vmf->vma->vm_mm;
+ struct vm_area_struct *vma = vmf->vma;
+ struct mm_struct *mm = vma->vm_mm;
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq;
vm_fault_t ret = VM_FAULT_SIGBUS;
@@ -418,7 +417,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
*/
mmap_assert_locked(mm);
- ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
+ ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
goto out;
@@ -508,6 +507,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
blocking_state = userfaultfd_get_blocking_state(vmf->flags);
+ /*
+ * Take the vma lock now, in order to safely call
+ * userfaultfd_huge_must_wait() later. Since acquiring the
+ * (sleepable) vma lock can modify the current task state, that
+ * must be before explicitly calling set_current_state().
+ */
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_vma_lock_read(vma);
+
spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
@@ -522,13 +530,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
set_current_state(blocking_state);
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
- if (!is_vm_hugetlb_page(vmf->vma))
+ if (!is_vm_hugetlb_page(vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
reason);
else
- must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
+ must_wait = userfaultfd_huge_must_wait(ctx, vma,
vmf->address,
vmf->flags, reason);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_vma_unlock_read(vma);
mmap_read_unlock(mm);
if (likely(must_wait && !READ_ONCE(ctx->released))) {
@@ -873,7 +883,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
/* len == 0 means wake all */
struct userfaultfd_wake_range range = { .len = 0, };
unsigned long new_flags;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
WRITE_ONCE(ctx->released, true);
@@ -890,7 +900,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
*/
mmap_write_lock(mm);
prev = NULL;
- mas_for_each(&mas, vma, ULONG_MAX) {
+ for_each_vma(vmi, vma) {
cond_resched();
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
!!(vma->vm_flags & __VM_UFFD_FLAGS));
@@ -899,13 +909,12 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
continue;
}
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
- prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+ prev = vma_merge(&vmi, mm, prev, vma->vm_start, vma->vm_end,
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
- mas_pause(&mas);
vma = prev;
} else {
prev = vma;
@@ -1292,7 +1301,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
bool found;
bool basic_ioctls;
unsigned long start, end, vma_end;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ struct vma_iterator vmi;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1334,17 +1343,13 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (!mmget_not_zero(mm))
goto out;
+ ret = -EINVAL;
mmap_write_lock(mm);
- mas_set(&mas, start);
- vma = mas_find(&mas, ULONG_MAX);
+ vma_iter_init(&vmi, mm, start);
+ vma = vma_find(&vmi, end);
if (!vma)
goto out_unlock;
- /* check that there's at least one vma in the range */
- ret = -EINVAL;
- if (vma->vm_start >= end)
- goto out_unlock;
-
/*
* If the first vma contains huge pages, make sure start address
* is aligned to huge page size.
@@ -1361,7 +1366,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
*/
found = false;
basic_ioctls = false;
- for (cur = vma; cur; cur = mas_next(&mas, end - 1)) {
+ cur = vma;
+ do {
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
@@ -1418,16 +1424,14 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
basic_ioctls = true;
found = true;
- }
+ } for_each_vma_range(vmi, cur, end);
BUG_ON(!found);
- mas_set(&mas, start);
- prev = mas_prev(&mas, 0);
- if (prev != vma)
- mas_next(&mas, ULONG_MAX);
+ vma_iter_set(&vmi, start);
+ prev = vma_prev(&vmi);
ret = 0;
- do {
+ for_each_vma_range(vmi, vma, end) {
cond_resched();
BUG_ON(!vma_can_userfault(vma, vm_flags));
@@ -1448,30 +1452,25 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_end = min(end, vma->vm_end);
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
- prev = vma_merge(mm, prev, start, vma_end, new_flags,
+ prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
((struct vm_userfaultfd_ctx){ ctx }),
anon_vma_name(vma));
if (prev) {
/* vma_merge() invalidated the mas */
- mas_pause(&mas);
vma = prev;
goto next;
}
if (vma->vm_start < start) {
- ret = split_vma(mm, vma, start, 1);
+ ret = split_vma(&vmi, vma, start, 1);
if (ret)
break;
- /* split_vma() invalidated the mas */
- mas_pause(&mas);
}
if (vma->vm_end > end) {
- ret = split_vma(mm, vma, end, 0);
+ ret = split_vma(&vmi, vma, end, 0);
if (ret)
break;
- /* split_vma() invalidated the mas */
- mas_pause(&mas);
}
next:
/*
@@ -1488,8 +1487,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
skip:
prev = vma;
start = vma->vm_end;
- vma = mas_next(&mas, end - 1);
- } while (vma);
+ }
+
out_unlock:
mmap_write_unlock(mm);
mmput(mm);
@@ -1533,7 +1532,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
bool found;
unsigned long start, end, vma_end;
const void __user *buf = (void __user *)arg;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ struct vma_iterator vmi;
ret = -EFAULT;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
@@ -1552,14 +1551,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
- mas_set(&mas, start);
- vma = mas_find(&mas, ULONG_MAX);
- if (!vma)
- goto out_unlock;
-
- /* check that there's at least one vma in the range */
ret = -EINVAL;
- if (vma->vm_start >= end)
+ vma_iter_init(&vmi, mm, start);
+ vma = vma_find(&vmi, end);
+ if (!vma)
goto out_unlock;
/*
@@ -1577,8 +1572,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* Search for not compatible vmas.
*/
found = false;
- ret = -EINVAL;
- for (cur = vma; cur; cur = mas_next(&mas, end - 1)) {
+ cur = vma;
+ do {
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
@@ -1595,16 +1590,13 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out_unlock;
found = true;
- }
+ } for_each_vma_range(vmi, cur, end);
BUG_ON(!found);
- mas_set(&mas, start);
- prev = mas_prev(&mas, 0);
- if (prev != vma)
- mas_next(&mas, ULONG_MAX);
-
+ vma_iter_set(&vmi, start);
+ prev = vma_prev(&vmi);
ret = 0;
- do {
+ for_each_vma_range(vmi, vma, end) {
cond_resched();
BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
@@ -1640,26 +1632,23 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
uffd_wp_range(mm, vma, start, vma_end - start, false);
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
- prev = vma_merge(mm, prev, start, vma_end, new_flags,
+ prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
vma = prev;
- mas_pause(&mas);
goto next;
}
if (vma->vm_start < start) {
- ret = split_vma(mm, vma, start, 1);
+ ret = split_vma(&vmi, vma, start, 1);
if (ret)
break;
- mas_pause(&mas);
}
if (vma->vm_end > end) {
- ret = split_vma(mm, vma, end, 0);
+ ret = split_vma(&vmi, vma, end, 0);
if (ret)
break;
- mas_pause(&mas);
}
next:
/*
@@ -1673,8 +1662,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
skip:
prev = vma;
start = vma->vm_end;
- vma = mas_next(&mas, end - 1);
- } while (vma);
+ }
+
out_unlock:
mmap_write_unlock(mm);
mmput(mm);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d06c0cc62f61..705250f9f90a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1429,7 +1429,7 @@ xfs_file_mmap(
file_accessed(file);
vma->vm_ops = &xfs_file_vm_ops;
if (IS_DAX(inode))
- vma->vm_flags |= VM_HUGEPAGE;
+ vm_flags_set(vma, VM_HUGEPAGE);
return 0;
}