summaryrefslogtreecommitdiff
path: root/mm/mm_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mm_init.c')
-rw-r--r--mm/mm_init.c161
1 files changed, 109 insertions, 52 deletions
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 7f7f9c677854..a1963c3322af 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -259,6 +259,8 @@ static int __init cmdline_parse_core(char *p, unsigned long *core,
return 0;
}
+bool mirrored_kernelcore __initdata_memblock;
+
/*
* kernelcore=size sets the amount of memory for use for allocations that
* cannot be reclaimed or migrated.
@@ -644,10 +646,8 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
}
/* Returns true if the struct page for the pfn is initialised */
-static inline bool __meminit early_page_initialised(unsigned long pfn)
+static inline bool __meminit early_page_initialised(unsigned long pfn, int nid)
{
- int nid = early_pfn_to_nid(pfn);
-
if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
return false;
@@ -693,15 +693,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
return false;
}
-static void __meminit init_reserved_page(unsigned long pfn)
+static void __meminit init_reserved_page(unsigned long pfn, int nid)
{
pg_data_t *pgdat;
- int nid, zid;
+ int zid;
- if (early_page_initialised(pfn))
+ if (early_page_initialised(pfn, nid))
return;
- nid = early_pfn_to_nid(pfn);
pgdat = NODE_DATA(nid);
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
@@ -715,7 +714,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
#else
static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
-static inline bool early_page_initialised(unsigned long pfn)
+static inline bool early_page_initialised(unsigned long pfn, int nid)
{
return true;
}
@@ -725,7 +724,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
return false;
}
-static inline void init_reserved_page(unsigned long pfn)
+static inline void init_reserved_page(unsigned long pfn, int nid)
{
}
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
@@ -736,7 +735,8 @@ static inline void init_reserved_page(unsigned long pfn)
* marks the pages PageReserved. The remaining valid pages are later
* sent to the buddy page allocator.
*/
-void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
+void __meminit reserve_bootmem_region(phys_addr_t start,
+ phys_addr_t end, int nid)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long end_pfn = PFN_UP(end);
@@ -745,7 +745,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
if (pfn_valid(start_pfn)) {
struct page *page = pfn_to_page(start_pfn);
- init_reserved_page(start_pfn);
+ init_reserved_page(start_pfn, nid);
/* Avoid false-positive PageTail() */
INIT_LIST_HEAD(&page->lru);
@@ -1166,24 +1166,15 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn,
/* Return the number of page frames in holes in a zone on a node */
static unsigned long __init zone_absent_pages_in_node(int nid,
unsigned long zone_type,
- unsigned long node_start_pfn,
- unsigned long node_end_pfn)
+ unsigned long zone_start_pfn,
+ unsigned long zone_end_pfn)
{
- unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
- unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
- unsigned long zone_start_pfn, zone_end_pfn;
unsigned long nr_absent;
- /* When hotadd a new node from cpu_up(), the node should be empty */
- if (!node_start_pfn && !node_end_pfn)
+ /* zone is empty, we don't have any absent pages */
+ if (zone_start_pfn == zone_end_pfn)
return 0;
- zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
- zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
-
- adjust_zone_range_for_zone_movable(nid, zone_type,
- node_start_pfn, node_end_pfn,
- &zone_start_pfn, &zone_end_pfn);
nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
/*
@@ -1227,9 +1218,6 @@ static unsigned long __init zone_spanned_pages_in_node(int nid,
{
unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
- /* When hotadd a new node from cpu_up(), the node should be empty */
- if (!node_start_pfn && !node_end_pfn)
- return 0;
/* Get the start and end of the zone */
*zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
@@ -1250,6 +1238,24 @@ static unsigned long __init zone_spanned_pages_in_node(int nid,
return *zone_end_pfn - *zone_start_pfn;
}
+static void __init reset_memoryless_node_totalpages(struct pglist_data *pgdat)
+{
+ struct zone *z;
+
+ for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) {
+ z->zone_start_pfn = 0;
+ z->spanned_pages = 0;
+ z->present_pages = 0;
+#if defined(CONFIG_MEMORY_HOTPLUG)
+ z->present_early_pages = 0;
+#endif
+ }
+
+ pgdat->node_spanned_pages = 0;
+ pgdat->node_present_pages = 0;
+ pr_debug("On node %d totalpages: 0\n", pgdat->node_id);
+}
+
static void __init calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long node_start_pfn,
unsigned long node_end_pfn)
@@ -1261,7 +1267,7 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
struct zone *zone = pgdat->node_zones + i;
unsigned long zone_start_pfn, zone_end_pfn;
unsigned long spanned, absent;
- unsigned long size, real_size;
+ unsigned long real_size;
spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
node_start_pfn,
@@ -1269,23 +1275,22 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
&zone_start_pfn,
&zone_end_pfn);
absent = zone_absent_pages_in_node(pgdat->node_id, i,
- node_start_pfn,
- node_end_pfn);
+ zone_start_pfn,
+ zone_end_pfn);
- size = spanned;
- real_size = size - absent;
+ real_size = spanned - absent;
- if (size)
+ if (spanned)
zone->zone_start_pfn = zone_start_pfn;
else
zone->zone_start_pfn = 0;
- zone->spanned_pages = size;
+ zone->spanned_pages = spanned;
zone->present_pages = real_size;
#if defined(CONFIG_MEMORY_HOTPLUG)
zone->present_early_pages = real_size;
#endif
- totalpages += size;
+ totalpages += spanned;
realtotalpages += real_size;
}
@@ -1375,6 +1380,10 @@ static void __meminit zone_init_free_lists(struct zone *zone)
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
+
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ INIT_LIST_HEAD(&zone->unaccepted_pages);
+#endif
}
void __meminit init_currently_empty_zone(struct zone *zone,
@@ -1502,6 +1511,8 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
pgdat->kswapd_order = 0;
pgdat->kswapd_highest_zoneidx = 0;
pgdat->node_start_pfn = 0;
+ pgdat->node_present_pages = 0;
+
for_each_online_cpu(cpu) {
struct per_cpu_nodestat *p;
@@ -1509,8 +1520,17 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
memset(p, 0, sizeof(*p));
}
- for (z = 0; z < MAX_NR_ZONES; z++)
- zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+ /*
+ * When memory is hot-added, all the memory is in offline state. So
+ * clear all zones' present_pages and managed_pages because they will
+ * be updated in online_pages() and offline_pages().
+ */
+ for (z = 0; z < MAX_NR_ZONES; z++) {
+ struct zone *zone = pgdat->node_zones + z;
+
+ zone->present_pages = 0;
+ zone_init_internals(zone, z, nid, 0);
+ }
}
#endif
@@ -1578,7 +1598,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
if (!size)
continue;
- set_pageblock_order();
setup_usemap(zone);
init_currently_empty_zone(zone, zone->zone_start_pfn, size);
}
@@ -1702,11 +1721,13 @@ static void __init free_area_init_node(int nid)
pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
(u64)start_pfn << PAGE_SHIFT,
end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
+
+ calculate_node_totalpages(pgdat, start_pfn, end_pfn);
} else {
pr_info("Initmem setup node %d as memoryless\n", nid);
- }
- calculate_node_totalpages(pgdat, start_pfn, end_pfn);
+ reset_memoryless_node_totalpages(pgdat);
+ }
alloc_node_mem_map(pgdat);
pgdat_set_deferred_range(pgdat);
@@ -1716,7 +1737,7 @@ static void __init free_area_init_node(int nid)
}
/* Any regular or high memory on that node ? */
-static void check_for_memory(pg_data_t *pgdat, int nid)
+static void check_for_memory(pg_data_t *pgdat)
{
enum zone_type zone_type;
@@ -1724,9 +1745,9 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
struct zone *zone = &pgdat->node_zones[zone_type];
if (populated_zone(zone)) {
if (IS_ENABLED(CONFIG_HIGHMEM))
- node_set_state(nid, N_HIGH_MEMORY);
+ node_set_state(pgdat->node_id, N_HIGH_MEMORY);
if (zone_type <= ZONE_NORMAL)
- node_set_state(nid, N_NORMAL_MEMORY);
+ node_set_state(pgdat->node_id, N_NORMAL_MEMORY);
break;
}
}
@@ -1745,11 +1766,6 @@ void __init setup_nr_node_ids(void)
}
#endif
-static void __init free_area_init_memoryless_node(int nid)
-{
- free_area_init_node(nid);
-}
-
/*
* Some architectures, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
* such cases we allow max_zone_pfn sorted in the descending order
@@ -1848,6 +1864,8 @@ void __init free_area_init(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
+ set_pageblock_order();
+
for_each_node(nid) {
pg_data_t *pgdat;
@@ -1860,7 +1878,7 @@ void __init free_area_init(unsigned long *max_zone_pfn)
panic("Cannot allocate %zuB for node %d.\n",
sizeof(*pgdat), nid);
arch_refresh_nodedata(nid, pgdat);
- free_area_init_memoryless_node(nid);
+ free_area_init_node(nid);
/*
* We do not want to confuse userspace by sysfs
@@ -1881,7 +1899,7 @@ void __init free_area_init(unsigned long *max_zone_pfn)
/* Any memory on that node */
if (pgdat->node_present_pages)
node_set_state(nid, N_MEMORY);
- check_for_memory(pgdat, nid);
+ check_for_memory(pgdat);
}
memmap_init();
@@ -1960,6 +1978,9 @@ static void __init deferred_free_range(unsigned long pfn,
return;
}
+ /* Accept chunks smaller than MAX_ORDER upfront */
+ accept_memory(PFN_PHYS(pfn), PFN_PHYS(pfn + nr_pages));
+
for (i = 0; i < nr_pages; i++, page++, pfn++) {
if (pageblock_aligned(pfn))
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
@@ -2328,6 +2349,28 @@ void __init init_cma_reserved_pageblock(struct page *page)
}
#endif
+void set_zone_contiguous(struct zone *zone)
+{
+ unsigned long block_start_pfn = zone->zone_start_pfn;
+ unsigned long block_end_pfn;
+
+ block_end_pfn = pageblock_end_pfn(block_start_pfn);
+ for (; block_start_pfn < zone_end_pfn(zone);
+ block_start_pfn = block_end_pfn,
+ block_end_pfn += pageblock_nr_pages) {
+
+ block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+ if (!__pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, zone))
+ return;
+ cond_resched();
+ }
+
+ /* We confirm that there is no hole */
+ zone->contiguous = true;
+}
+
void __init page_alloc_init_late(void)
{
struct zone *zone;
@@ -2368,6 +2411,8 @@ void __init page_alloc_init_late(void)
/* Initialize page ext after all struct pages are initialized. */
if (deferred_struct_pages)
page_ext_init();
+
+ page_alloc_sysctl_init();
}
#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
@@ -2532,8 +2577,14 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
void __init memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order)
{
- if (!early_page_initialised(pfn))
- return;
+
+ if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
+ int nid = early_pfn_to_nid(pfn);
+
+ if (!early_page_initialised(pfn, nid))
+ return;
+ }
+
if (!kmsan_memblock_free_pages(page, order)) {
/* KMSAN will take care of these pages. */
return;
@@ -2541,6 +2592,12 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
__free_pages_core(page, order);
}
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
+EXPORT_SYMBOL(init_on_alloc);
+
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
+EXPORT_SYMBOL(init_on_free);
+
static bool _init_on_alloc_enabled_early __read_mostly
= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
static int __init early_init_on_alloc(char *buf)