summaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c567
1 files changed, 272 insertions, 295 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index dae481293b5d..3e9feb5cc570 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -835,6 +835,8 @@ EXPORT_SYMBOL(file_write_and_wait_range);
*/
void replace_page_cache_page(struct page *old, struct page *new)
{
+ struct folio *fold = page_folio(old);
+ struct folio *fnew = page_folio(new);
struct address_space *mapping = old->mapping;
void (*freepage)(struct page *) = mapping->a_ops->freepage;
pgoff_t offset = old->index;
@@ -848,7 +850,7 @@ void replace_page_cache_page(struct page *old, struct page *new)
new->mapping = mapping;
new->index = offset;
- mem_cgroup_migrate(old, new);
+ mem_cgroup_migrate(fold, fnew);
xas_lock_irq(&xas);
xas_store(&xas, new);
@@ -870,26 +872,25 @@ void replace_page_cache_page(struct page *old, struct page *new)
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-noinline int __add_to_page_cache_locked(struct page *page,
- struct address_space *mapping,
- pgoff_t offset, gfp_t gfp,
- void **shadowp)
+noinline int __filemap_add_folio(struct address_space *mapping,
+ struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
{
- XA_STATE(xas, &mapping->i_pages, offset);
- int huge = PageHuge(page);
+ XA_STATE(xas, &mapping->i_pages, index);
+ int huge = folio_test_hugetlb(folio);
int error;
bool charged = false;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
mapping_set_update(&xas, mapping);
- get_page(page);
- page->mapping = mapping;
- page->index = offset;
+ folio_get(folio);
+ folio->mapping = mapping;
+ folio->index = index;
if (!huge) {
- error = mem_cgroup_charge(page, NULL, gfp);
+ error = mem_cgroup_charge(folio, NULL, gfp);
+ VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
if (error)
goto error;
charged = true;
@@ -901,7 +902,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
unsigned int order = xa_get_order(xas.xa, xas.xa_index);
void *entry, *old = NULL;
- if (order > thp_order(page))
+ if (order > folio_order(folio))
xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
order, gfp);
xas_lock_irq(&xas);
@@ -918,13 +919,13 @@ noinline int __add_to_page_cache_locked(struct page *page,
*shadowp = old;
/* entry may have been split before we acquired lock */
order = xa_get_order(xas.xa, xas.xa_index);
- if (order > thp_order(page)) {
+ if (order > folio_order(folio)) {
xas_split(&xas, old, order);
xas_reset(&xas);
}
}
- xas_store(&xas, page);
+ xas_store(&xas, folio);
if (xas_error(&xas))
goto unlock;
@@ -932,7 +933,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
/* hugetlb pages do not participate in page cache accounting */
if (!huge)
- __inc_lruvec_page_state(page, NR_FILE_PAGES);
+ __lruvec_stat_add_folio(folio, NR_FILE_PAGES);
unlock:
xas_unlock_irq(&xas);
} while (xas_nomem(&xas, gfp));
@@ -940,19 +941,19 @@ unlock:
if (xas_error(&xas)) {
error = xas_error(&xas);
if (charged)
- mem_cgroup_uncharge(page);
+ mem_cgroup_uncharge(folio);
goto error;
}
- trace_mm_filemap_add_to_page_cache(page);
+ trace_mm_filemap_add_to_page_cache(&folio->page);
return 0;
error:
- page->mapping = NULL;
+ folio->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
- put_page(page);
+ folio_put(folio);
return error;
}
-ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
+ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
/**
* add_to_page_cache_locked - add a locked page to the pagecache
@@ -969,59 +970,58 @@ ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- return __add_to_page_cache_locked(page, mapping, offset,
+ return __filemap_add_folio(mapping, page_folio(page), offset,
gfp_mask, NULL);
}
EXPORT_SYMBOL(add_to_page_cache_locked);
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask)
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp)
{
void *shadow = NULL;
int ret;
- __SetPageLocked(page);
- ret = __add_to_page_cache_locked(page, mapping, offset,
- gfp_mask, &shadow);
+ __folio_set_locked(folio);
+ ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow);
if (unlikely(ret))
- __ClearPageLocked(page);
+ __folio_clear_locked(folio);
else {
/*
- * The page might have been evicted from cache only
+ * The folio might have been evicted from cache only
* recently, in which case it should be activated like
- * any other repeatedly accessed page.
- * The exception is pages getting rewritten; evicting other
+ * any other repeatedly accessed folio.
+ * The exception is folios getting rewritten; evicting other
* data from the working set, only to cache data that will
* get overwritten with something else, is a waste of memory.
*/
- WARN_ON_ONCE(PageActive(page));
- if (!(gfp_mask & __GFP_WRITE) && shadow)
- workingset_refault(page, shadow);
- lru_cache_add(page);
+ WARN_ON_ONCE(folio_test_active(folio));
+ if (!(gfp & __GFP_WRITE) && shadow)
+ workingset_refault(folio, shadow);
+ folio_add_lru(folio);
}
return ret;
}
-EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
+EXPORT_SYMBOL_GPL(filemap_add_folio);
#ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
{
int n;
- struct page *page;
+ struct folio *folio;
if (cpuset_do_page_mem_spread()) {
unsigned int cpuset_mems_cookie;
do {
cpuset_mems_cookie = read_mems_allowed_begin();
n = cpuset_mem_spread_node();
- page = __alloc_pages_node(n, gfp, 0);
- } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
+ folio = __folio_alloc_node(gfp, order, n);
+ } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie));
- return page;
+ return folio;
}
- return alloc_pages(gfp, 0);
+ return folio_alloc(gfp, order);
}
-EXPORT_SYMBOL(__page_cache_alloc);
+EXPORT_SYMBOL(filemap_alloc_folio);
#endif
/*
@@ -1074,11 +1074,11 @@ EXPORT_SYMBOL(filemap_invalidate_unlock_two);
*/
#define PAGE_WAIT_TABLE_BITS 8
#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
-static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
+static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
-static wait_queue_head_t *page_waitqueue(struct page *page)
+static wait_queue_head_t *folio_waitqueue(struct folio *folio)
{
- return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
+ return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
}
void __init pagecache_init(void)
@@ -1086,7 +1086,7 @@ void __init pagecache_init(void)
int i;
for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
- init_waitqueue_head(&page_wait_table[i]);
+ init_waitqueue_head(&folio_wait_table[i]);
page_writeback_init();
}
@@ -1141,10 +1141,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
*/
flags = wait->flags;
if (flags & WQ_FLAG_EXCLUSIVE) {
- if (test_bit(key->bit_nr, &key->page->flags))
+ if (test_bit(key->bit_nr, &key->folio->flags))
return -1;
if (flags & WQ_FLAG_CUSTOM) {
- if (test_and_set_bit(key->bit_nr, &key->page->flags))
+ if (test_and_set_bit(key->bit_nr, &key->folio->flags))
return -1;
flags |= WQ_FLAG_DONE;
}
@@ -1157,7 +1157,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
*
* So update the flags atomically, and wake up the waiter
* afterwards to avoid any races. This store-release pairs
- * with the load-acquire in wait_on_page_bit_common().
+ * with the load-acquire in folio_wait_bit_common().
*/
smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
wake_up_state(wait->private, mode);
@@ -1176,14 +1176,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
return (flags & WQ_FLAG_EXCLUSIVE) != 0;
}
-static void wake_up_page_bit(struct page *page, int bit_nr)
+static void folio_wake_bit(struct folio *folio, int bit_nr)
{
- wait_queue_head_t *q = page_waitqueue(page);
+ wait_queue_head_t *q = folio_waitqueue(folio);
struct wait_page_key key;
unsigned long flags;
wait_queue_entry_t bookmark;
- key.page = page;
+ key.folio = folio;
key.bit_nr = bit_nr;
key.page_match = 0;
@@ -1218,7 +1218,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
* page waiters.
*/
if (!waitqueue_active(q) || !key.page_match) {
- ClearPageWaiters(page);
+ folio_clear_waiters(folio);
/*
* It's possible to miss clearing Waiters here, when we woke
* our page waiters, but the hashed waitqueue has waiters for
@@ -1230,19 +1230,19 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
spin_unlock_irqrestore(&q->lock, flags);
}
-static void wake_up_page(struct page *page, int bit)
+static void folio_wake(struct folio *folio, int bit)
{
- if (!PageWaiters(page))
+ if (!folio_test_waiters(folio))
return;
- wake_up_page_bit(page, bit);
+ folio_wake_bit(folio, bit);
}
/*
- * A choice of three behaviors for wait_on_page_bit_common():
+ * A choice of three behaviors for folio_wait_bit_common():
*/
enum behavior {
EXCLUSIVE, /* Hold ref to page and take the bit when woken, like
- * __lock_page() waiting on then setting PG_locked.
+ * __folio_lock() waiting on then setting PG_locked.
*/
SHARED, /* Hold ref to page and check the bit when woken, like
* wait_on_page_writeback() waiting on PG_writeback.
@@ -1253,16 +1253,16 @@ enum behavior {
};
/*
- * Attempt to check (or get) the page bit, and mark us done
+ * Attempt to check (or get) the folio flag, and mark us done
* if successful.
*/
-static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
struct wait_queue_entry *wait)
{
if (wait->flags & WQ_FLAG_EXCLUSIVE) {
- if (test_and_set_bit(bit_nr, &page->flags))
+ if (test_and_set_bit(bit_nr, &folio->flags))
return false;
- } else if (test_bit(bit_nr, &page->flags))
+ } else if (test_bit(bit_nr, &folio->flags))
return false;
wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
@@ -1272,9 +1272,10 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
/* How many times do we accept lock stealing from under a waiter? */
int sysctl_page_lock_unfairness = 5;
-static inline int wait_on_page_bit_common(wait_queue_head_t *q,
- struct page *page, int bit_nr, int state, enum behavior behavior)
+static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
+ int state, enum behavior behavior)
{
+ wait_queue_head_t *q = folio_waitqueue(folio);
int unfairness = sysctl_page_lock_unfairness;
struct wait_page_queue wait_page;
wait_queue_entry_t *wait = &wait_page.wait;
@@ -1283,8 +1284,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
unsigned long pflags;
if (bit_nr == PG_locked &&
- !PageUptodate(page) && PageWorkingset(page)) {
- if (!PageSwapBacked(page)) {
+ !folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+ if (!folio_test_swapbacked(folio)) {
delayacct_thrashing_start();
delayacct = true;
}
@@ -1294,7 +1295,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
init_wait(wait);
wait->func = wake_page_function;
- wait_page.page = page;
+ wait_page.folio = folio;
wait_page.bit_nr = bit_nr;
repeat:
@@ -1309,7 +1310,7 @@ repeat:
* Do one last check whether we can get the
* page bit synchronously.
*
- * Do the SetPageWaiters() marking before that
+ * Do the folio_set_waiters() marking before that
* to let any waker we _just_ missed know they
* need to wake us up (otherwise they'll never
* even go to the slow case that looks at the
@@ -1320,8 +1321,8 @@ repeat:
* lock to avoid races.
*/
spin_lock_irq(&q->lock);
- SetPageWaiters(page);
- if (!trylock_page_bit_common(page, bit_nr, wait))
+ folio_set_waiters(folio);
+ if (!folio_trylock_flag(folio, bit_nr, wait))
__add_wait_queue_entry_tail(q, wait);
spin_unlock_irq(&q->lock);
@@ -1331,10 +1332,10 @@ repeat:
* see whether the page bit testing has already
* been done by the wake function.
*
- * We can drop our reference to the page.
+ * We can drop our reference to the folio.
*/
if (behavior == DROP)
- put_page(page);
+ folio_put(folio);
/*
* Note that until the "finish_wait()", or until
@@ -1371,7 +1372,7 @@ repeat:
*
* And if that fails, we'll have to retry this all.
*/
- if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
+ if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0))))
goto repeat;
wait->flags |= WQ_FLAG_DONE;
@@ -1380,7 +1381,7 @@ repeat:
/*
* If a signal happened, this 'finish_wait()' may remove the last
- * waiter from the wait-queues, but the PageWaiters bit will remain
+ * waiter from the wait-queues, but the folio waiters bit will remain
* set. That's ok. The next wakeup will take care of it, and trying
* to do it here would be difficult and prone to races.
*/
@@ -1411,19 +1412,17 @@ repeat:
return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}
-void wait_on_page_bit(struct page *page, int bit_nr)
+void folio_wait_bit(struct folio *folio, int bit_nr)
{
- wait_queue_head_t *q = page_waitqueue(page);
- wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
+ folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
}
-EXPORT_SYMBOL(wait_on_page_bit);
+EXPORT_SYMBOL(folio_wait_bit);
-int wait_on_page_bit_killable(struct page *page, int bit_nr)
+int folio_wait_bit_killable(struct folio *folio, int bit_nr)
{
- wait_queue_head_t *q = page_waitqueue(page);
- return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
+ return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED);
}
-EXPORT_SYMBOL(wait_on_page_bit_killable);
+EXPORT_SYMBOL(folio_wait_bit_killable);
/**
* put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
@@ -1440,31 +1439,28 @@ EXPORT_SYMBOL(wait_on_page_bit_killable);
*/
int put_and_wait_on_page_locked(struct page *page, int state)
{
- wait_queue_head_t *q;
-
- page = compound_head(page);
- q = page_waitqueue(page);
- return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
+ return folio_wait_bit_common(page_folio(page), PG_locked, state,
+ DROP);
}
/**
- * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
- * @page: Page defining the wait queue of interest
+ * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue
+ * @folio: Folio defining the wait queue of interest
* @waiter: Waiter to add to the queue
*
- * Add an arbitrary @waiter to the wait queue for the nominated @page.
+ * Add an arbitrary @waiter to the wait queue for the nominated @folio.
*/
-void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
{
- wait_queue_head_t *q = page_waitqueue(page);
+ wait_queue_head_t *q = folio_waitqueue(folio);
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue_entry_tail(q, waiter);
- SetPageWaiters(page);
+ folio_set_waiters(folio);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL_GPL(add_page_wait_queue);
+EXPORT_SYMBOL_GPL(folio_add_wait_queue);
#ifndef clear_bit_unlock_is_negative_byte
@@ -1490,124 +1486,116 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
#endif
/**
- * unlock_page - unlock a locked page
- * @page: the page
+ * folio_unlock - Unlock a locked folio.
+ * @folio: The folio.
*
- * Unlocks the page and wakes up sleepers in wait_on_page_locked().
- * Also wakes sleepers in wait_on_page_writeback() because the wakeup
- * mechanism between PageLocked pages and PageWriteback pages is shared.
- * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
+ * Unlocks the folio and wakes up any thread sleeping on the page lock.
*
- * Note that this depends on PG_waiters being the sign bit in the byte
- * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
- * clear the PG_locked bit and test PG_waiters at the same time fairly
- * portably (architectures that do LL/SC can test any bit, while x86 can
- * test the sign bit).
+ * Context: May be called from interrupt or process context. May not be
+ * called from NMI context.
*/
-void unlock_page(struct page *page)
+void folio_unlock(struct folio *folio)
{
+ /* Bit 7 allows x86 to check the byte's sign bit */
BUILD_BUG_ON(PG_waiters != 7);
- page = compound_head(page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
- wake_up_page_bit(page, PG_locked);
+ BUILD_BUG_ON(PG_locked > 7);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
+ folio_wake_bit(folio, PG_locked);
}
-EXPORT_SYMBOL(unlock_page);
+EXPORT_SYMBOL(folio_unlock);
/**
- * end_page_private_2 - Clear PG_private_2 and release any waiters
- * @page: The page
+ * folio_end_private_2 - Clear PG_private_2 and wake any waiters.
+ * @folio: The folio.
*
- * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
- * this. The page ref held for PG_private_2 being set is released.
+ * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for
+ * it. The folio reference held for PG_private_2 being set is released.
*
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
+ * This is, for example, used when a netfs folio is being written to a local
+ * disk cache, thereby allowing writes to the cache for the same folio to be
* serialised.
*/
-void end_page_private_2(struct page *page)
+void folio_end_private_2(struct folio *folio)
{
- page = compound_head(page);
- VM_BUG_ON_PAGE(!PagePrivate2(page), page);
- clear_bit_unlock(PG_private_2, &page->flags);
- wake_up_page_bit(page, PG_private_2);
- put_page(page);
+ VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio);
+ clear_bit_unlock(PG_private_2, folio_flags(folio, 0));
+ folio_wake_bit(folio, PG_private_2);
+ folio_put(folio);
}
-EXPORT_SYMBOL(end_page_private_2);
+EXPORT_SYMBOL(folio_end_private_2);
/**
- * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio.
+ * @folio: The folio to wait on.
*
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio.
*/
-void wait_on_page_private_2(struct page *page)
+void folio_wait_private_2(struct folio *folio)
{
- page = compound_head(page);
- while (PagePrivate2(page))
- wait_on_page_bit(page, PG_private_2);
+ while (folio_test_private_2(folio))
+ folio_wait_bit(folio, PG_private_2);
}
-EXPORT_SYMBOL(wait_on_page_private_2);
+EXPORT_SYMBOL(folio_wait_private_2);
/**
- * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio.
+ * @folio: The folio to wait on.
*
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a
* fatal signal is received by the calling task.
*
* Return:
* - 0 if successful.
* - -EINTR if a fatal signal was encountered.
*/
-int wait_on_page_private_2_killable(struct page *page)
+int folio_wait_private_2_killable(struct folio *folio)
{
int ret = 0;
- page = compound_head(page);
- while (PagePrivate2(page)) {
- ret = wait_on_page_bit_killable(page, PG_private_2);
+ while (folio_test_private_2(folio)) {
+ ret = folio_wait_bit_killable(folio, PG_private_2);
if (ret < 0)
break;
}
return ret;
}
-EXPORT_SYMBOL(wait_on_page_private_2_killable);
+EXPORT_SYMBOL(folio_wait_private_2_killable);
/**
- * end_page_writeback - end writeback against a page
- * @page: the page
+ * folio_end_writeback - End writeback against a folio.
+ * @folio: The folio.
*/
-void end_page_writeback(struct page *page)
+void folio_end_writeback(struct folio *folio)
{
/*
- * TestClearPageReclaim could be used here but it is an atomic
- * operation and overkill in this particular case. Failing to
- * shuffle a page marked for immediate reclaim is too mild to
- * justify taking an atomic operation penalty at the end of
- * ever page writeback.
+ * folio_test_clear_reclaim() could be used here but it is an
+ * atomic operation and overkill in this particular case. Failing
+ * to shuffle a folio marked for immediate reclaim is too mild
+ * a gain to justify taking an atomic operation penalty at the
+ * end of every folio writeback.
*/
- if (PageReclaim(page)) {
- ClearPageReclaim(page);
- rotate_reclaimable_page(page);
+ if (folio_test_reclaim(folio)) {
+ folio_clear_reclaim(folio);
+ folio_rotate_reclaimable(folio);
}
/*
- * Writeback does not hold a page reference of its own, relying
+ * Writeback does not hold a folio reference of its own, relying
* on truncation to wait for the clearing of PG_writeback.
- * But here we must make sure that the page is not freed and
- * reused before the wake_up_page().
+ * But here we must make sure that the folio is not freed and
+ * reused before the folio_wake().
*/
- get_page(page);
- if (!test_clear_page_writeback(page))
+ folio_get(folio);
+ if (!__folio_end_writeback(folio))
BUG();
smp_mb__after_atomic();
- wake_up_page(page, PG_writeback);
- put_page(page);
+ folio_wake(folio, PG_writeback);
+ folio_put(folio);
}
-EXPORT_SYMBOL(end_page_writeback);
+EXPORT_SYMBOL(folio_end_writeback);
/*
* After completing I/O on a page, call this routine to update the page
@@ -1638,39 +1626,35 @@ void page_endio(struct page *page, bool is_write, int err)
EXPORT_SYMBOL_GPL(page_endio);
/**
- * __lock_page - get a lock on the page, assuming we need to sleep to get it
- * @__page: the page to lock
+ * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it.
+ * @folio: The folio to lock
*/
-void __lock_page(struct page *__page)
+void __folio_lock(struct folio *folio)
{
- struct page *page = compound_head(__page);
- wait_queue_head_t *q = page_waitqueue(page);
- wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
+ folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE,
EXCLUSIVE);
}
-EXPORT_SYMBOL(__lock_page);
+EXPORT_SYMBOL(__folio_lock);
-int __lock_page_killable(struct page *__page)
+int __folio_lock_killable(struct folio *folio)
{
- struct page *page = compound_head(__page);
- wait_queue_head_t *q = page_waitqueue(page);
- return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
+ return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE,
EXCLUSIVE);
}
-EXPORT_SYMBOL_GPL(__lock_page_killable);
+EXPORT_SYMBOL_GPL(__folio_lock_killable);
-int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
{
- struct wait_queue_head *q = page_waitqueue(page);
+ struct wait_queue_head *q = folio_waitqueue(folio);
int ret = 0;
- wait->page = page;
+ wait->folio = folio;
wait->bit_nr = PG_locked;
spin_lock_irq(&q->lock);
__add_wait_queue_entry_tail(q, &wait->wait);
- SetPageWaiters(page);
- ret = !trylock_page(page);
+ folio_set_waiters(folio);
+ ret = !folio_trylock(folio);
/*
* If we were successful now, we know we're still on the
* waitqueue as we're still under the lock. This means it's
@@ -1687,16 +1671,16 @@ int __lock_page_async(struct page *page, struct wait_page_queue *wait)
/*
* Return values:
- * 1 - page is locked; mmap_lock is still held.
- * 0 - page is not locked.
+ * true - folio is locked; mmap_lock is still held.
+ * false - folio is not locked.
* mmap_lock has been released (mmap_read_unlock(), unless flags had both
* FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
* which case mmap_lock is still held.
*
- * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
- * with the page locked and the mmap_lock unperturbed.
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return true
+ * with the folio locked and the mmap_lock unperturbed.
*/
-int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
unsigned int flags)
{
if (fault_flag_allow_retry_first(flags)) {
@@ -1705,28 +1689,28 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
* even though return 0.
*/
if (flags & FAULT_FLAG_RETRY_NOWAIT)
- return 0;
+ return false;
mmap_read_unlock(mm);
if (flags & FAULT_FLAG_KILLABLE)
- wait_on_page_locked_killable(page);
+ folio_wait_locked_killable(folio);
else
- wait_on_page_locked(page);
- return 0;
+ folio_wait_locked(folio);
+ return false;
}
if (flags & FAULT_FLAG_KILLABLE) {
- int ret;
+ bool ret;
- ret = __lock_page_killable(page);
+ ret = __folio_lock_killable(folio);
if (ret) {
mmap_read_unlock(mm);
- return 0;
+ return false;
}
} else {
- __lock_page(page);
+ __folio_lock(folio);
}
- return 1;
+ return true;
}
/**
@@ -1802,143 +1786,155 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
EXPORT_SYMBOL(page_cache_prev_miss);
/*
+ * Lockless page cache protocol:
+ * On the lookup side:
+ * 1. Load the folio from i_pages
+ * 2. Increment the refcount if it's not zero
+ * 3. If the folio is not found by xas_reload(), put the refcount and retry
+ *
+ * On the removal side:
+ * A. Freeze the page (by zeroing the refcount if nobody else has a reference)
+ * B. Remove the page from i_pages
+ * C. Return the page to the page allocator
+ *
+ * This means that any page may have its reference count temporarily
+ * increased by a speculative page cache (or fast GUP) lookup as it can
+ * be allocated by another user before the RCU grace period expires.
+ * Because the refcount temporarily acquired here may end up being the
+ * last refcount on the page, any page allocation must be freeable by
+ * folio_put().
+ */
+
+/*
* mapping_get_entry - Get a page cache entry.
* @mapping: the address_space to search
* @index: The page cache index.
*
- * Looks up the page cache slot at @mapping & @index. If there is a
- * page cache page, the head page is returned with an increased refcount.
+ * Looks up the page cache entry at @mapping & @index. If it is a folio,
+ * it is returned with an increased refcount. If it is a shadow entry
+ * of a previously evicted folio, or a swap entry from shmem/tmpfs,
+ * it is returned without further action.
*
- * If the slot holds a shadow entry of a previously evicted page, or a
- * swap entry from shmem/tmpfs, it is returned.
- *
- * Return: The head page or shadow entry, %NULL if nothing is found.
+ * Return: The folio, swap or shadow entry, %NULL if nothing is found.
*/
-static struct page *mapping_get_entry(struct address_space *mapping,
- pgoff_t index)
+static void *mapping_get_entry(struct address_space *mapping, pgoff_t index)
{
XA_STATE(xas, &mapping->i_pages, index);
- struct page *page;
+ struct folio *folio;
rcu_read_lock();
repeat:
xas_reset(&xas);
- page = xas_load(&xas);
- if (xas_retry(&xas, page))
+ folio = xas_load(&xas);
+ if (xas_retry(&xas, folio))
goto repeat;
/*
* A shadow entry of a recently evicted page, or a swap entry from
* shmem/tmpfs. Return it without attempting to raise page count.
*/
- if (!page || xa_is_value(page))
+ if (!folio || xa_is_value(folio))
goto out;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
goto repeat;
- /*
- * Has the page moved or been split?
- * This is part of the lockless pagecache protocol. See
- * include/linux/pagemap.h for details.
- */
- if (unlikely(page != xas_reload(&xas))) {
- put_page(page);
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
goto repeat;
}
out:
rcu_read_unlock();
- return page;
+ return folio;
}
/**
- * pagecache_get_page - Find and get a reference to a page.
+ * __filemap_get_folio - Find and get a reference to a folio.
* @mapping: The address_space to search.
* @index: The page index.
- * @fgp_flags: %FGP flags modify how the page is returned.
- * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified.
+ * @fgp_flags: %FGP flags modify how the folio is returned.
+ * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
*
* Looks up the page cache entry at @mapping & @index.
*
* @fgp_flags can be zero or more of these flags:
*
- * * %FGP_ACCESSED - The page will be marked accessed.
- * * %FGP_LOCK - The page is returned locked.
- * * %FGP_HEAD - If the page is present and a THP, return the head page
- * rather than the exact page specified by the index.
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
* * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it
- * instead of allocating a new page to replace it.
+ * instead of allocating a new folio to replace it.
* * %FGP_CREAT - If no page is present then a new page is allocated using
- * @gfp_mask and added to the page cache and the VM's LRU list.
+ * @gfp and added to the page cache and the VM's LRU list.
* The page is returned locked and with an increased refcount.
* * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
* page is already in cache. If the page was allocated, unlock it before
* returning so the caller can do the same dance.
- * * %FGP_WRITE - The page will be written
- * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask
- * * %FGP_NOWAIT - Don't get blocked by page lock
+ * * %FGP_WRITE - The page will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't get blocked by page lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
*
* If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
* if the %GFP flags specified for %FGP_CREAT are atomic.
*
* If there is a page cache page, it is returned with an increased refcount.
*
- * Return: The found page or %NULL otherwise.
+ * Return: The found folio or %NULL otherwise.
*/
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp_mask)
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp)
{
- struct page *page;
+ struct folio *folio;
repeat:
- page = mapping_get_entry(mapping, index);
- if (xa_is_value(page)) {
+ folio = mapping_get_entry(mapping, index);
+ if (xa_is_value(folio)) {
if (fgp_flags & FGP_ENTRY)
- return page;
- page = NULL;
+ return folio;
+ folio = NULL;
}
- if (!page)
+ if (!folio)
goto no_page;
if (fgp_flags & FGP_LOCK) {
if (fgp_flags & FGP_NOWAIT) {
- if (!trylock_page(page)) {
- put_page(page);
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
return NULL;
}
} else {
- lock_page(page);
+ folio_lock(folio);
}
/* Has the page been truncated? */
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- put_page(page);
+ if (unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
- VM_BUG_ON_PAGE(!thp_contains(page, index), page);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
}
if (fgp_flags & FGP_ACCESSED)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
else if (fgp_flags & FGP_WRITE) {
/* Clear idle flag for buffer write */
- if (page_is_idle(page))
- clear_page_idle(page);
+ if (folio_test_idle(folio))
+ folio_clear_idle(folio);
}
- if (!(fgp_flags & FGP_HEAD))
- page = find_subpage(page, index);
+ if (fgp_flags & FGP_STABLE)
+ folio_wait_stable(folio);
no_page:
- if (!page && (fgp_flags & FGP_CREAT)) {
+ if (!folio && (fgp_flags & FGP_CREAT)) {
int err;
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
- gfp_mask |= __GFP_WRITE;
+ gfp |= __GFP_WRITE;
if (fgp_flags & FGP_NOFS)
- gfp_mask &= ~__GFP_FS;
+ gfp &= ~__GFP_FS;
- page = __page_cache_alloc(gfp_mask);
- if (!page)
+ folio = filemap_alloc_folio(gfp, 0);
+ if (!folio)
return NULL;
if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
@@ -1946,27 +1942,27 @@ no_page:
/* Init accessed so avoid atomic mark_page_accessed later */
if (fgp_flags & FGP_ACCESSED)
- __SetPageReferenced(page);
+ __folio_set_referenced(folio);
- err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+ err = filemap_add_folio(mapping, folio, index, gfp);
if (unlikely(err)) {
- put_page(page);
- page = NULL;
+ folio_put(folio);
+ folio = NULL;
if (err == -EEXIST)
goto repeat;
}
/*
- * add_to_page_cache_lru locks the page, and for mmap we expect
- * an unlocked page.
+ * filemap_add_folio locks the page, and for mmap
+ * we expect an unlocked page.
*/
- if (page && (fgp_flags & FGP_FOR_MMAP))
- unlock_page(page);
+ if (folio && (fgp_flags & FGP_FOR_MMAP))
+ folio_unlock(folio);
}
- return page;
+ return folio;
}
-EXPORT_SYMBOL(pagecache_get_page);
+EXPORT_SYMBOL(__filemap_get_folio);
static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
xa_mark_t mark)
@@ -2421,6 +2417,7 @@ static int filemap_update_page(struct kiocb *iocb,
struct address_space *mapping, struct iov_iter *iter,
struct page *page)
{
+ struct folio *folio = page_folio(page);
int error;
if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -2430,40 +2427,40 @@ static int filemap_update_page(struct kiocb *iocb,
filemap_invalidate_lock_shared(mapping);
}
- if (!trylock_page(page)) {
+ if (!folio_trylock(folio)) {
error = -EAGAIN;
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
goto unlock_mapping;
if (!(iocb->ki_flags & IOCB_WAITQ)) {
filemap_invalidate_unlock_shared(mapping);
- put_and_wait_on_page_locked(page, TASK_KILLABLE);
+ put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE);
return AOP_TRUNCATED_PAGE;
}
- error = __lock_page_async(page, iocb->ki_waitq);
+ error = __folio_lock_async(folio, iocb->ki_waitq);
if (error)
goto unlock_mapping;
}
error = AOP_TRUNCATED_PAGE;
- if (!page->mapping)
+ if (!folio->mapping)
goto unlock;
error = 0;
- if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
+ if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page))
goto unlock;
error = -EAGAIN;
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
goto unlock;
- error = filemap_read_page(iocb->ki_filp, mapping, page);
+ error = filemap_read_page(iocb->ki_filp, mapping, &folio->page);
goto unlock_mapping;
unlock:
- unlock_page(page);
+ folio_unlock(folio);
unlock_mapping:
filemap_invalidate_unlock_shared(mapping);
if (error == AOP_TRUNCATED_PAGE)
- put_page(page);
+ folio_put(folio);
return error;
}
@@ -2900,7 +2897,9 @@ unlock:
static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
struct file **fpin)
{
- if (trylock_page(page))
+ struct folio *folio = page_folio(page);
+
+ if (folio_trylock(folio))
return 1;
/*
@@ -2913,7 +2912,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
*fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
if (vmf->flags & FAULT_FLAG_KILLABLE) {
- if (__lock_page_killable(page)) {
+ if (__folio_lock_killable(folio)) {
/*
* We didn't have the right flags to drop the mmap_lock,
* but all fault_handlers only check for fatal signals
@@ -2925,11 +2924,11 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
return 0;
}
} else
- __lock_page(page);
+ __folio_lock(folio);
+
return 1;
}
-
/*
* Synchronous readahead happens when we don't even find a page in the page
* cache at all. We don't want to perform IO under the mmap sem, so if we have
@@ -3708,28 +3707,6 @@ out:
}
EXPORT_SYMBOL(generic_file_direct_write);
-/*
- * Find or create a page at the given pagecache position. Return the locked
- * page. This function is specifically for buffered writes.
- */
-struct page *grab_cache_page_write_begin(struct address_space *mapping,
- pgoff_t index, unsigned flags)
-{
- struct page *page;
- int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
-
- if (flags & AOP_FLAG_NOFS)
- fgp_flags |= FGP_NOFS;
-
- page = pagecache_get_page(mapping, index, fgp_flags,
- mapping_gfp_mask(mapping));
- if (page)
- wait_for_stable_page(page);
-
- return page;
-}
-EXPORT_SYMBOL(grab_cache_page_write_begin);
-
ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
{