summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c3
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/huge_memory.c37
-rw-r--r--mm/hugetlb.c53
-rw-r--r--mm/kasan/hw_tags.c8
-rw-r--r--mm/kasan/kasan.h2
-rw-r--r--mm/memblock.c49
-rw-r--r--mm/memcontrol.c5
-rw-r--r--mm/migrate.c6
-rw-r--r--mm/mremap.c5
-rw-r--r--mm/page_alloc.c92
-rw-r--r--mm/slub.c22
12 files changed, 152 insertions, 134 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index e5acb9714436..190ccdaa6c19 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1342,7 +1342,7 @@ fast_isolate_freepages(struct compact_control *cc)
{
unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
unsigned int nr_scanned = 0;
- unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
+ unsigned long low_pfn, min_pfn, highest = 0;
unsigned long nr_isolated = 0;
unsigned long distance;
struct page *page = NULL;
@@ -1387,6 +1387,7 @@ fast_isolate_freepages(struct compact_control *cc)
struct page *freepage;
unsigned long flags;
unsigned int order_scanned = 0;
+ unsigned long high_pfn = 0;
if (!area->nr_free)
continue;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5c9d564317a5..aa0e0fb04670 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
XA_STATE(xas, &mapping->i_pages, offset);
int huge = PageHuge(page);
int error;
+ bool charged = false;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
error = mem_cgroup_charge(page, current->mm, gfp);
if (error)
goto error;
+ charged = true;
}
gfp &= GFP_RECLAIM_MASK;
@@ -896,6 +898,8 @@ unlock:
if (xas_error(&xas)) {
error = xas_error(&xas);
+ if (charged)
+ mem_cgroup_uncharge(page);
goto error;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9237976abe72..91ca9b103ee5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2202,7 +2202,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
spinlock_t *ptl;
struct mmu_notifier_range range;
- bool was_locked = false;
+ bool do_unlock_page = false;
pmd_t _pmd;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
@@ -2218,7 +2218,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
VM_BUG_ON(freeze && !page);
if (page) {
VM_WARN_ON_ONCE(!PageLocked(page));
- was_locked = true;
if (page != pmd_page(*pmd))
goto out;
}
@@ -2227,19 +2226,29 @@ repeat:
if (pmd_trans_huge(*pmd)) {
if (!page) {
page = pmd_page(*pmd);
- if (unlikely(!trylock_page(page))) {
- get_page(page);
- _pmd = *pmd;
- spin_unlock(ptl);
- lock_page(page);
- spin_lock(ptl);
- if (unlikely(!pmd_same(*pmd, _pmd))) {
- unlock_page(page);
+ /*
+ * An anonymous page must be locked, to ensure that a
+ * concurrent reuse_swap_page() sees stable mapcount;
+ * but reuse_swap_page() is not used on shmem or file,
+ * and page lock must not be taken when zap_pmd_range()
+ * calls __split_huge_pmd() while i_mmap_lock is held.
+ */
+ if (PageAnon(page)) {
+ if (unlikely(!trylock_page(page))) {
+ get_page(page);
+ _pmd = *pmd;
+ spin_unlock(ptl);
+ lock_page(page);
+ spin_lock(ptl);
+ if (unlikely(!pmd_same(*pmd, _pmd))) {
+ unlock_page(page);
+ put_page(page);
+ page = NULL;
+ goto repeat;
+ }
put_page(page);
- page = NULL;
- goto repeat;
}
- put_page(page);
+ do_unlock_page = true;
}
}
if (PageMlocked(page))
@@ -2249,7 +2258,7 @@ repeat:
__split_huge_pmd_locked(vma, pmd, range.start, freeze);
out:
spin_unlock(ptl);
- if (!was_locked && page)
+ if (do_unlock_page)
unlock_page(page);
/*
* No need to double call mmu_notifier->invalidate_range() callback.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 18f6ee317900..4bdb58ab14cb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock);
static int num_fault_mutexes;
struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
+static inline bool PageHugeFreed(struct page *head)
+{
+ return page_private(head + 4) == -1UL;
+}
+
+static inline void SetPageHugeFreed(struct page *head)
+{
+ set_page_private(head + 4, -1UL);
+}
+
+static inline void ClearPageHugeFreed(struct page *head)
+{
+ set_page_private(head + 4, 0);
+}
+
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
@@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
list_move(&page->lru, &h->hugepage_freelists[nid]);
h->free_huge_pages++;
h->free_huge_pages_node[nid]++;
+ SetPageHugeFreed(page);
}
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
list_move(&page->lru, &h->hugepage_activelist);
set_page_refcounted(page);
+ ClearPageHugeFreed(page);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
return page;
@@ -1344,12 +1361,11 @@ struct hstate *size_to_hstate(unsigned long size)
*/
bool page_huge_active(struct page *page)
{
- VM_BUG_ON_PAGE(!PageHuge(page), page);
- return PageHead(page) && PagePrivate(&page[1]);
+ return PageHeadHuge(page) && PagePrivate(&page[1]);
}
/* never called for tail page */
-static void set_page_huge_active(struct page *page)
+void set_page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
SetPagePrivate(&page[1]);
@@ -1505,6 +1521,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
spin_lock(&hugetlb_lock);
h->nr_huge_pages++;
h->nr_huge_pages_node[nid]++;
+ ClearPageHugeFreed(page);
spin_unlock(&hugetlb_lock);
}
@@ -1755,6 +1772,7 @@ int dissolve_free_huge_page(struct page *page)
{
int rc = -EBUSY;
+retry:
/* Not to disrupt normal path by vainly holding hugetlb_lock */
if (!PageHuge(page))
return 0;
@@ -1771,6 +1789,26 @@ int dissolve_free_huge_page(struct page *page)
int nid = page_to_nid(head);
if (h->free_huge_pages - h->resv_huge_pages == 0)
goto out;
+
+ /*
+ * We should make sure that the page is already on the free list
+ * when it is dissolved.
+ */
+ if (unlikely(!PageHugeFreed(head))) {
+ spin_unlock(&hugetlb_lock);
+ cond_resched();
+
+ /*
+ * Theoretically, we should return -EBUSY when we
+ * encounter this race. In fact, we have a chance
+ * to successfully dissolve the page if we do a
+ * retry. Because the race window is quite small.
+ * If we seize this opportunity, it is an optimization
+ * for increasing the success rate of dissolving page.
+ */
+ goto retry;
+ }
+
/*
* Move PageHWPoison flag from head page to the raw error page,
* which makes any subpages rather than the error page reusable.
@@ -2009,13 +2047,16 @@ retry:
/* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
+ int zeroed;
+
if ((--needed) < 0)
break;
/*
* This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference.
*/
- VM_BUG_ON_PAGE(!put_page_testzero(page), page);
+ zeroed = put_page_testzero(page);
+ VM_BUG_ON_PAGE(!zeroed, page);
enqueue_huge_page(h, page);
}
free:
@@ -5555,9 +5596,9 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
{
bool ret = true;
- VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock);
- if (!page_huge_active(page) || !get_page_unless_zero(page)) {
+ if (!PageHeadHuge(page) || !page_huge_active(page) ||
+ !get_page_unless_zero(page)) {
ret = false;
goto unlock;
}
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index e529428e7a11..d558799b25b3 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -134,12 +134,8 @@ void __init kasan_init_hw_tags(void)
switch (kasan_arg_stacktrace) {
case KASAN_ARG_STACKTRACE_DEFAULT:
- /*
- * Default to enabling stack trace collection for
- * debug kernels.
- */
- if (IS_ENABLED(CONFIG_DEBUG_KERNEL))
- static_branch_enable(&kasan_flag_stacktrace);
+ /* Default to enabling stack trace collection. */
+ static_branch_enable(&kasan_flag_stacktrace);
break;
case KASAN_ARG_STACKTRACE_OFF:
/* Do nothing, kasan_flag_stacktrace keeps its default value. */
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index cc4d9e1d49b1..8c706e7652f2 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -209,7 +209,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
static inline bool addr_has_metadata(const void *addr)
{
- return true;
+ return (is_vmalloc_addr(addr) || virt_addr_valid(addr));
}
#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
diff --git a/mm/memblock.c b/mm/memblock.c
index 1eaaec1e7687..8d9b5f1e7040 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -275,14 +275,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
*
* Find @size free area aligned to @align in the specified range and node.
*
- * When allocation direction is bottom-up, the @start should be greater
- * than the end of the kernel image. Otherwise, it will be trimmed. The
- * reason is that we want the bottom-up allocation just near the kernel
- * image so it is highly likely that the allocated memory and the kernel
- * will reside in the same node.
- *
- * If bottom-up allocation failed, will try to allocate memory top-down.
- *
* Return:
* Found address on success, 0 on failure.
*/
@@ -291,8 +283,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t end, int nid,
enum memblock_flags flags)
{
- phys_addr_t kernel_end, ret;
-
/* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
end == MEMBLOCK_ALLOC_KASAN)
@@ -301,40 +291,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
/* avoid allocating the first page */
start = max_t(phys_addr_t, start, PAGE_SIZE);
end = max(start, end);
- kernel_end = __pa_symbol(_end);
-
- /*
- * try bottom-up allocation only when bottom-up mode
- * is set and @end is above the kernel image.
- */
- if (memblock_bottom_up() && end > kernel_end) {
- phys_addr_t bottom_up_start;
-
- /* make sure we will allocate above the kernel */
- bottom_up_start = max(start, kernel_end);
- /* ok, try bottom-up allocation first */
- ret = __memblock_find_range_bottom_up(bottom_up_start, end,
- size, align, nid, flags);
- if (ret)
- return ret;
-
- /*
- * we always limit bottom-up allocation above the kernel,
- * but top-down allocation doesn't have the limit, so
- * retrying top-down allocation may succeed when bottom-up
- * allocation failed.
- *
- * bottom-up allocation is expected to be fail very rarely,
- * so we use WARN_ONCE() here to see the stack trace if
- * fail happens.
- */
- WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
- "memblock: bottom-up allocation failed, memory hotremove may be affected\n");
- }
-
- return __memblock_find_range_top_down(start, end, size, align, nid,
- flags);
+ if (memblock_bottom_up())
+ return __memblock_find_range_bottom_up(start, end, size, align,
+ nid, flags);
+ else
+ return __memblock_find_range_top_down(start, end, size, align,
+ nid, flags);
}
/**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2de77b5bcc2..913c2b9e5c72 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6271,6 +6271,8 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
if (err)
return err;
+ page_counter_set_high(&memcg->memory, high);
+
for (;;) {
unsigned long nr_pages = page_counter_read(&memcg->memory);
unsigned long reclaimed;
@@ -6294,10 +6296,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
break;
}
- page_counter_set_high(&memcg->memory, high);
-
memcg_wb_domain_size_changed(memcg);
-
return nbytes;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index c0efe921bca5..20ca887ea769 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1280,6 +1280,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return -ENOSYS;
}
+ if (page_count(hpage) == 1) {
+ /* page was freed from under us. So we are done. */
+ putback_active_hugepage(hpage);
+ return MIGRATEPAGE_SUCCESS;
+ }
+
new_hpage = get_new_page(hpage, private);
if (!new_hpage)
return -ENOMEM;
diff --git a/mm/mremap.c b/mm/mremap.c
index f554320281cc..aa63bfd3cad2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -336,8 +336,9 @@ enum pgt_entry {
* valid. Else returns a smaller extent bounded by the end of the source and
* destination pgt_entry.
*/
-static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr,
- unsigned long old_end, unsigned long new_addr)
+static __always_inline unsigned long get_extent(enum pgt_entry entry,
+ unsigned long old_addr, unsigned long old_end,
+ unsigned long new_addr)
{
unsigned long next, extent, mask, size;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 783913e41f65..ef5070fed76b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5137,8 +5137,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
}
EXPORT_SYMBOL(__page_frag_cache_drain);
-void *page_frag_alloc(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask)
+void *page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
@@ -5190,11 +5191,12 @@ refill:
}
nc->pagecnt_bias--;
+ offset &= align_mask;
nc->offset = offset;
return nc->va + offset;
}
-EXPORT_SYMBOL(page_frag_alloc);
+EXPORT_SYMBOL(page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
@@ -7080,26 +7082,23 @@ void __init free_area_init_memoryless_node(int nid)
* Initialize all valid struct pages in the range [spfn, epfn) and mark them
* PageReserved(). Return the number of struct pages that were initialized.
*/
-static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn,
- int zone, int nid)
+static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
{
- unsigned long pfn, zone_spfn, zone_epfn;
+ unsigned long pfn;
u64 pgcnt = 0;
- zone_spfn = arch_zone_lowest_possible_pfn[zone];
- zone_epfn = arch_zone_highest_possible_pfn[zone];
-
- spfn = clamp(spfn, zone_spfn, zone_epfn);
- epfn = clamp(epfn, zone_spfn, zone_epfn);
-
for (pfn = spfn; pfn < epfn; pfn++) {
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+ pageblock_nr_pages - 1;
continue;
}
-
- __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
+ /*
+ * Use a fake node/zone (0) for now. Some of these pages
+ * (in memblock.reserved but not in memblock.memory) will
+ * get re-initialized via reserve_bootmem_region() later.
+ */
+ __init_single_page(pfn_to_page(pfn), pfn, 0, 0);
__SetPageReserved(pfn_to_page(pfn));
pgcnt++;
}
@@ -7108,64 +7107,51 @@ static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn,
}
/*
- * Only struct pages that correspond to ranges defined by memblock.memory
- * are zeroed and initialized by going through __init_single_page() during
- * memmap_init().
- *
- * But, there could be struct pages that correspond to holes in
- * memblock.memory. This can happen because of the following reasons:
- * - phyiscal memory bank size is not necessarily the exact multiple of the
- * arbitrary section size
- * - early reserved memory may not be listed in memblock.memory
- * - memory layouts defined with memmap= kernel parameter may not align
- * nicely with memmap sections
+ * Only struct pages that are backed by physical memory are zeroed and
+ * initialized by going through __init_single_page(). But, there are some
+ * struct pages which are reserved in memblock allocator and their fields
+ * may be accessed (for example page_to_pfn() on some configuration accesses
+ * flags). We must explicitly initialize those struct pages.
*
- * Explicitly initialize those struct pages so that:
- * - PG_Reserved is set
- * - zone link is set accorging to the architecture constrains
- * - node is set to node id of the next populated region except for the
- * trailing hole where last node id is used
+ * This function also addresses a similar issue where struct pages are left
+ * uninitialized because the physical address range is not covered by
+ * memblock.memory or memblock.reserved. That could happen when memblock
+ * layout is manually configured via memmap=, or when the highest physical
+ * address (max_pfn) does not end on a section boundary.
*/
-static void __init init_zone_unavailable_mem(int zone)
+static void __init init_unavailable_mem(void)
{
- unsigned long start, end;
- int i, nid;
- u64 pgcnt;
- unsigned long next = 0;
+ phys_addr_t start, end;
+ u64 i, pgcnt;
+ phys_addr_t next = 0;
/*
- * Loop through holes in memblock.memory and initialize struct
- * pages corresponding to these holes
+ * Loop through unavailable ranges not covered by memblock.memory.
*/
pgcnt = 0;
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
+ for_each_mem_range(i, &start, &end) {
if (next < start)
- pgcnt += init_unavailable_range(next, start, zone, nid);
+ pgcnt += init_unavailable_range(PFN_DOWN(next),
+ PFN_UP(start));
next = end;
}
/*
- * Last section may surpass the actual end of memory (e.g. we can
- * have 1Gb section and 512Mb of RAM pouplated).
- * Make sure that memmap has a well defined state in this case.
+ * Early sections always have a fully populated memmap for the whole
+ * section - see pfn_valid(). If the last section has holes at the
+ * end and that section is marked "online", the memmap will be
+ * considered initialized. Make sure that memmap has a well defined
+ * state.
*/
- end = round_up(max_pfn, PAGES_PER_SECTION);
- pgcnt += init_unavailable_range(next, end, zone, nid);
+ pgcnt += init_unavailable_range(PFN_DOWN(next),
+ round_up(max_pfn, PAGES_PER_SECTION));
/*
* Struct pages that do not have backing memory. This could be because
* firmware is using some of this memory, or for some other reasons.
*/
if (pgcnt)
- pr_info("Zone %s: zeroed struct page in unavailable ranges: %lld pages", zone_names[zone], pgcnt);
-}
-
-static void __init init_unavailable_mem(void)
-{
- int zone;
-
- for (zone = 0; zone < ZONE_MOVABLE; zone++)
- init_zone_unavailable_mem(zone);
+ pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
}
#else
static inline void __init init_unavailable_mem(void)
diff --git a/mm/slub.c b/mm/slub.c
index 69742ab9a21d..b22a4b101c84 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3423,6 +3423,7 @@ static inline int calculate_order(unsigned int size)
unsigned int order;
unsigned int min_objects;
unsigned int max_objects;
+ unsigned int nr_cpus;
/*
* Attempt to find best configuration for a slab. This
@@ -3433,8 +3434,21 @@ static inline int calculate_order(unsigned int size)
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
- if (!min_objects)
- min_objects = 4 * (fls(num_online_cpus()) + 1);
+ if (!min_objects) {
+ /*
+ * Some architectures will only update present cpus when
+ * onlining them, so don't trust the number if it's just 1. But
+ * we also don't want to use nr_cpu_ids always, as on some other
+ * architectures, there can be many possible cpus, but never
+ * onlined. Here we compromise between trying to avoid too high
+ * order on systems that appear larger than they are, and too
+ * low order on systems that appear smaller than they are.
+ */
+ nr_cpus = num_present_cpus();
+ if (nr_cpus <= 1)
+ nr_cpus = nr_cpu_ids;
+ min_objects = 4 * (fls(nr_cpus) + 1);
+ }
max_objects = order_objects(slub_max_order, size);
min_objects = min(min_objects, max_objects);
@@ -5625,10 +5639,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
s->kobj.kset = kset;
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
- if (err) {
- kobject_put(&s->kobj);
+ if (err)
goto out;
- }
err = sysfs_create_group(&s->kobj, &slab_attr_group);
if (err)