summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c167
1 files changed, 78 insertions, 89 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e60657875d3..0745aedebb37 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
_ret; \
})
-#define pcpu_spin_lock_irqsave(type, member, ptr, flags) \
+#define pcpu_spin_trylock(type, member, ptr) \
({ \
type *_ret; \
pcpu_task_pin(); \
_ret = this_cpu_ptr(ptr); \
- spin_lock_irqsave(&_ret->member, flags); \
- _ret; \
-})
-
-#define pcpu_spin_trylock_irqsave(type, member, ptr, flags) \
-({ \
- type *_ret; \
- pcpu_task_pin(); \
- _ret = this_cpu_ptr(ptr); \
- if (!spin_trylock_irqsave(&_ret->member, flags)) { \
+ if (!spin_trylock(&_ret->member)) { \
pcpu_task_unpin(); \
_ret = NULL; \
} \
@@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
pcpu_task_unpin(); \
})
-#define pcpu_spin_unlock_irqrestore(member, ptr, flags) \
-({ \
- spin_unlock_irqrestore(&ptr->member, flags); \
- pcpu_task_unpin(); \
-})
-
/* struct per_cpu_pages specific helpers. */
#define pcp_spin_lock(ptr) \
pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
-#define pcp_spin_lock_irqsave(ptr, flags) \
- pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags)
-
-#define pcp_spin_trylock_irqsave(ptr, flags) \
- pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags)
+#define pcp_spin_trylock(ptr) \
+ pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
#define pcp_spin_unlock(ptr) \
pcpu_spin_unlock(lock, ptr)
-#define pcp_spin_unlock_irqrestore(ptr, flags) \
- pcpu_spin_unlock_irqrestore(lock, ptr, flags)
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -798,6 +778,7 @@ static void prep_compound_head(struct page *page, unsigned int order)
set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
set_compound_order(page, order);
atomic_set(compound_mapcount_ptr(page), -1);
+ atomic_set(subpages_mapcount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
}
@@ -1324,11 +1305,19 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
}
switch (page - head_page) {
case 1:
- /* the first tail page: ->mapping may be compound_mapcount() */
- if (unlikely(compound_mapcount(page))) {
+ /* the first tail page: these may be in place of ->mapping */
+ if (unlikely(head_compound_mapcount(head_page))) {
bad_page(page, "nonzero compound_mapcount");
goto out;
}
+ if (unlikely(atomic_read(subpages_mapcount_ptr(head_page)))) {
+ bad_page(page, "nonzero subpages_mapcount");
+ goto out;
+ }
+ if (unlikely(head_compound_pincount(head_page))) {
+ bad_page(page, "nonzero compound_pincount");
+ goto out;
+ }
break;
case 2:
/*
@@ -1431,10 +1420,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
- if (compound) {
- ClearPageDoubleMap(page);
+ if (compound)
ClearPageHasHWPoisoned(page);
- }
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
@@ -1547,6 +1534,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp,
int pindex)
{
+ unsigned long flags;
int min_pindex = 0;
int max_pindex = NR_PCP_LISTS - 1;
unsigned int order;
@@ -1562,8 +1550,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* Ensure requested pindex is drained first. */
pindex = pindex - 1;
- /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
isolated_pageblocks = has_isolate_pageblock(zone);
while (count > 0) {
@@ -1611,7 +1598,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
} while (count > 0 && !list_empty(list));
}
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static void free_one_page(struct zone *zone,
@@ -1715,6 +1702,11 @@ static void __free_pages_ok(struct page *page, unsigned int order,
if (!free_pages_prepare(page, order, true, fpi_flags))
return;
+ /*
+ * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
+ * is used to avoid calling get_pfnblock_migratetype() under the lock.
+ * This will reduce the lock holding time.
+ */
migratetype = get_pfnblock_migratetype(page, pfn);
spin_lock_irqsave(&zone->lock, flags);
@@ -3125,10 +3117,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
+ unsigned long flags;
int i, allocated = 0;
- /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype,
alloc_flags);
@@ -3162,7 +3154,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
return allocated;
}
@@ -3179,16 +3171,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0) {
- unsigned long flags;
-
- /*
- * free_pcppages_bulk expects IRQs disabled for zone->lock
- * so even though pcp->lock is not intended to be IRQ-safe,
- * it's needed in this context.
- */
- spin_lock_irqsave(&pcp->lock, flags);
+ spin_lock(&pcp->lock);
free_pcppages_bulk(zone, to_drain, pcp, 0);
- spin_unlock_irqrestore(&pcp->lock, flags);
+ spin_unlock(&pcp->lock);
}
}
#endif
@@ -3202,12 +3187,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
if (pcp->count) {
- unsigned long flags;
-
- /* See drain_zone_pages on why this is disabling IRQs */
- spin_lock_irqsave(&pcp->lock, flags);
+ spin_lock(&pcp->lock);
free_pcppages_bulk(zone, pcp->count, pcp, 0);
- spin_unlock_irqrestore(&pcp->lock, flags);
+ spin_unlock(&pcp->lock);
}
}
@@ -3473,7 +3455,6 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
*/
void free_unref_page(struct page *page, unsigned int order)
{
- unsigned long flags;
unsigned long __maybe_unused UP_flags;
struct per_cpu_pages *pcp;
struct zone *zone;
@@ -3501,10 +3482,10 @@ void free_unref_page(struct page *page, unsigned int order)
zone = page_zone(page);
pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (pcp) {
free_unref_page_commit(zone, pcp, page, migratetype, order);
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
} else {
free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
}
@@ -3516,10 +3497,10 @@ void free_unref_page(struct page *page, unsigned int order)
*/
void free_unref_page_list(struct list_head *list)
{
+ unsigned long __maybe_unused UP_flags;
struct page *page, *next;
struct per_cpu_pages *pcp = NULL;
struct zone *locked_zone = NULL;
- unsigned long flags;
int batch_count = 0;
int migratetype;
@@ -3546,39 +3527,54 @@ void free_unref_page_list(struct list_head *list)
list_for_each_entry_safe(page, next, list, lru) {
struct zone *zone = page_zone(page);
- /* Different zone, different pcp lock. */
- if (zone != locked_zone) {
- if (pcp)
- pcp_spin_unlock_irqrestore(pcp, flags);
+ list_del(&page->lru);
+ migratetype = get_pcppage_migratetype(page);
+
+ /*
+ * Either different zone requiring a different pcp lock or
+ * excessive lock hold times when freeing a large list of
+ * pages.
+ */
+ if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
+ if (pcp) {
+ pcp_spin_unlock(pcp);
+ pcp_trylock_finish(UP_flags);
+ }
+
+ batch_count = 0;
+ /*
+ * trylock is necessary as pages may be getting freed
+ * from IRQ or SoftIRQ context after an IO completion.
+ */
+ pcp_trylock_prepare(UP_flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ if (unlikely(!pcp)) {
+ pcp_trylock_finish(UP_flags);
+ free_one_page(zone, page, page_to_pfn(page),
+ 0, migratetype, FPI_NONE);
+ locked_zone = NULL;
+ continue;
+ }
locked_zone = zone;
- pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
}
/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
*/
- migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES))
migratetype = MIGRATE_MOVABLE;
trace_mm_page_free_batched(page);
free_unref_page_commit(zone, pcp, page, migratetype, 0);
-
- /*
- * Guard against excessive IRQ disabled times when we get
- * a large list of pages to free.
- */
- if (++batch_count == SWAP_CLUSTER_MAX) {
- pcp_spin_unlock_irqrestore(pcp, flags);
- batch_count = 0;
- pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
- }
+ batch_count++;
}
- if (pcp)
- pcp_spin_unlock_irqrestore(pcp, flags);
+ if (pcp) {
+ pcp_spin_unlock(pcp);
+ pcp_trylock_finish(UP_flags);
+ }
}
/*
@@ -3779,15 +3775,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct per_cpu_pages *pcp;
struct list_head *list;
struct page *page;
- unsigned long flags;
unsigned long __maybe_unused UP_flags;
- /*
- * spin_trylock may fail due to a parallel drain. In the future, the
- * trylock will also protect against IRQ reentrancy.
- */
+ /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (!pcp) {
pcp_trylock_finish(UP_flags);
return NULL;
@@ -3801,7 +3793,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
pcp->free_factor >>= 1;
list = &pcp->lists[order_to_pindex(migratetype, order)];
page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -5372,7 +5364,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
struct page **page_array)
{
struct page *page;
- unsigned long flags;
unsigned long __maybe_unused UP_flags;
struct zone *zone;
struct zoneref *z;
@@ -5454,9 +5445,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(!zone))
goto failed;
- /* Is a parallel drain in progress? */
+ /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (!pcp)
goto failed_irq;
@@ -5475,7 +5466,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(!page)) {
/* Try and allocate at least one page */
if (!nr_account) {
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
goto failed_irq;
}
break;
@@ -5490,7 +5481,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nr_populated++;
}
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);
__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
@@ -6874,13 +6865,11 @@ static void __ref memmap_init_compound(struct page *head,
set_page_count(page, 0);
/*
- * The first tail page stores compound_mapcount_ptr() and
- * compound_order() and the second tail page stores
- * compound_pincount_ptr(). Call prep_compound_head() after
- * the first and second tail pages have been initialized to
- * not have the data overwritten.
+ * The first tail page stores important compound page info.
+ * Call prep_compound_head() after the first tail page has
+ * been initialized, to not have the data overwritten.
*/
- if (pfn == head_pfn + 2)
+ if (pfn == head_pfn + 1)
prep_compound_head(head, order);
}
}