From 7fb7ab6d618a4dc7ea3f3eafc92388a35b4f8894 Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 14 Dec 2020 19:04:46 -0800 Subject: mm: fix page_owner initializing issue for arm32 Page owner of pages used by page owner itself used is missing on arm32 targets. The reason is dummy_handle and failure_handle is not initialized correctly. Buddy allocator is used to initialize these two handles. However, buddy allocator is not ready when page owner calls it. This change fixed that by initializing page owner after buddy initialization. The working flow before and after this change are: original logic: 1. allocated memory for page_ext(using memblock). 2. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). 3. initialize buddy. after this change: 1. allocated memory for page_ext(using memblock). 2. initialize buddy. 3. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). with the change, failure/dummy_handle can get its correct value and page owner output for example has the one for page owner itself: Page allocated via order 2, mask 0x6202c0(GFP_USER|__GFP_NOWARN), pid 1006, ts 67278156558 ns PFN 543776 type Unmovable Block 531 type Unmovable Flags 0x0() init_page_owner+0x28/0x2f8 invoke_init_callbacks_flatmem+0x24/0x34 start_kernel+0x33c/0x5d8 Link: https://lkml.kernel.org/r/1603104925-5888-1-git-send-email-zhenhuah@codeaurora.org Signed-off-by: Zhenhua Huang Acked-by: Vlastimil Babka Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'init') diff --git a/init/main.c b/init/main.c index 32b2a8affafd..82ae0d1345a0 100644 --- a/init/main.c +++ b/init/main.c @@ -828,6 +828,8 @@ static void __init mm_init(void) init_debug_pagealloc(); report_meminit(); mem_init(); + /* page_owner must be initialized after buddy is ready */ + page_ext_init_flatmem_late(); kmem_cache_init(); kmemleak_init(); pgtable_init(); -- cgit v1.2.3 From ba8f3587f55667c688acd7c5103c870983e294dd Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Mon, 14 Dec 2020 19:11:19 -0800 Subject: init/main: fix broken buffer_init when DEFERRED_STRUCT_PAGE_INIT set In the booting phase if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set, we have following callchain: start_kernel ... mm_init mem_init memblock_free_all reset_all_zones_managed_pages free_low_memory_core_early ... buffer_init nr_free_buffer_pages zone->managed_pages ... rest_init kernel_init kernel_init_freeable page_alloc_init_late kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); wait_for_completion(&pgdat_init_all_done_comp); ... files_maxfiles_init It's clear that buffer_init depends on zone->managed_pages, but it's reset in reset_all_zones_managed_pages after that pages are readded into zone->managed_pages, but when buffer_init runs this process is half done and most of them will finally be added till deferred_init_memmap done. In large memory couting of nr_free_buffer_pages drifts too much, also drifting from kernels to kernels on same hardware. Fix is simple, it delays buffer_init run till deferred_init_memmap all done. But as corrected by this patch, max_buffer_heads becomes very large, the value is roughly as many as 4 times of totalram_pages, formula: max_buffer_heads = nrpages * (10%) * (PAGE_SIZE / sizeof(struct buffer_head)); Say in a 64GB memory box we have 16777216 pages, then max_buffer_heads turns out to be roughly 67,108,864. In common cases, should a buffer_head be mapped to one page/block(4KB)? So max_buffer_heads never exceeds totalram_pages. IMO it's likely to make buffer_heads_over_limit bool value alwasy false, then make codes 'if (buffer_heads_over_limit)' test in vmscan unnecessary. So this patch will change the original behavior related to buffer_heads_over_limit in vmscan since we used a half done value of zone->managed_pages before, or should we use a smaller factor(<10%) in previous formula. akpm: I think this is OK - the max_buffer_heads code is only needed on highmem machines, to prevent ZONE_NORMAL from being consumed by large amounts of buffer_heads attached to highmem pagecache. This problem will not occur on 64-bit machines, so this feature's non-functionality on such machines is a feature, not a bug. Link: https://lkml.kernel.org/r/20201123110500.103523-1-linf@wangsu.com Signed-off-by: Lin Feng Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 2 -- mm/page_alloc.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 82ae0d1345a0..404366e1a64d 100644 --- a/init/main.c +++ b/init/main.c @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -1036,7 +1035,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) fork_init(); proc_caches_init(); uts_ns_init(); - buffer_init(); key_init(); security_init(); dbg_late_init(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 371a22eb3180..53d633125e99 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -2113,6 +2114,8 @@ void __init page_alloc_init_late(void) files_maxfiles_init(); #endif + buffer_init(); + /* Discard memblock private memory */ memblock_discard(); -- cgit v1.2.3 From 04013513cc84c401c7de9023ff3eda7863fc4add Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:30 -0800 Subject: mm, page_alloc: do not rely on the order of page_poison and init_on_alloc/free parameters Patch series "cleanup page poisoning", v3. I have identified a number of issues and opportunities for cleanup with CONFIG_PAGE_POISON and friends: - interaction with init_on_alloc and init_on_free parameters depends on the order of parameters (Patch 1) - the boot time enabling uses static key, but inefficienty (Patch 2) - sanity checking is incompatible with hibernation (Patch 3) - CONFIG_PAGE_POISONING_NO_SANITY can be removed now that we have init_on_free (Patch 4) - CONFIG_PAGE_POISONING_ZERO can be most likely removed now that we have init_on_free (Patch 5) This patch (of 5): Enabling page_poison=1 together with init_on_alloc=1 or init_on_free=1 produces a warning in dmesg that page_poison takes precedence. However, as these warnings are printed in early_param handlers for init_on_alloc/free, they are not printed if page_poison is enabled later on the command line (handlers are called in the order of their parameters), or when init_on_alloc/free is always enabled by the respective config option - before the page_poison early param handler is called, it is not considered to be enabled. This is inconsistent. We can remove the dependency on order by making the init_on_* parameters only set a boolean variable, and postponing the evaluation after all early params have been processed. Introduce a new init_mem_debugging_and_hardening() function for that, and move the related debug_pagealloc processing there as well. As a result init_mem_debugging_and_hardening() knows always accurately if init_on_* and/or page_poison options were enabled. Thus we can also optimize want_init_on_alloc() and want_init_on_free(). We don't need to check page_poisoning_enabled() there, we can instead not enable the init_on_* static keys at all, if page poisoning is enabled. This results in a simpler and more effective code. Link: https://lkml.kernel.org/r/20201113104033.22907-1-vbabka@suse.cz Link: https://lkml.kernel.org/r/20201113104033.22907-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Michal Hocko Cc: Mateusz Nosek Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 20 ++----------- init/main.c | 2 +- mm/page_alloc.c | 88 ++++++++++++++++++++++++++---------------------------- 3 files changed, 46 insertions(+), 64 deletions(-) (limited to 'init') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5ec79757eeae..591c784277ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2872,6 +2872,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern void init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); @@ -2881,35 +2882,20 @@ static inline void kernel_poison_pages(struct page *page, int numpages, int enable) { } #endif -#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON -DECLARE_STATIC_KEY_TRUE(init_on_alloc); -#else DECLARE_STATIC_KEY_FALSE(init_on_alloc); -#endif static inline bool want_init_on_alloc(gfp_t flags) { - if (static_branch_unlikely(&init_on_alloc) && - !page_poisoning_enabled()) + if (static_branch_unlikely(&init_on_alloc)) return true; return flags & __GFP_ZERO; } -#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON -DECLARE_STATIC_KEY_TRUE(init_on_free); -#else DECLARE_STATIC_KEY_FALSE(init_on_free); -#endif static inline bool want_init_on_free(void) { - return static_branch_unlikely(&init_on_free) && - !page_poisoning_enabled(); + return static_branch_unlikely(&init_on_free); } -#ifdef CONFIG_DEBUG_PAGEALLOC -extern void init_debug_pagealloc(void); -#else -static inline void init_debug_pagealloc(void) {} -#endif extern bool _debug_pagealloc_enabled_early; DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); diff --git a/init/main.c b/init/main.c index 404366e1a64d..3024c4db17a9 100644 --- a/init/main.c +++ b/init/main.c @@ -824,7 +824,7 @@ static void __init mm_init(void) * bigger than MAX_ORDER unless SPARSEMEM. */ page_ext_init_flatmem(); - init_debug_pagealloc(); + init_mem_debugging_and_hardening(); report_meminit(); mem_init(); /* page_owner must be initialized after buddy is ready */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 521d29718c9e..3fd0c8d5745f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -167,53 +167,26 @@ unsigned long totalcma_pages __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; -#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON -DEFINE_STATIC_KEY_TRUE(init_on_alloc); -#else DEFINE_STATIC_KEY_FALSE(init_on_alloc); -#endif EXPORT_SYMBOL(init_on_alloc); -#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON -DEFINE_STATIC_KEY_TRUE(init_on_free); -#else DEFINE_STATIC_KEY_FALSE(init_on_free); -#endif EXPORT_SYMBOL(init_on_free); +static bool _init_on_alloc_enabled_early __read_mostly + = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); static int __init early_init_on_alloc(char *buf) { - int ret; - bool bool_result; - ret = kstrtobool(buf, &bool_result); - if (ret) - return ret; - if (bool_result && page_poisoning_enabled()) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n"); - if (bool_result) - static_branch_enable(&init_on_alloc); - else - static_branch_disable(&init_on_alloc); - return 0; + return kstrtobool(buf, &_init_on_alloc_enabled_early); } early_param("init_on_alloc", early_init_on_alloc); +static bool _init_on_free_enabled_early __read_mostly + = IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON); static int __init early_init_on_free(char *buf) { - int ret; - bool bool_result; - - ret = kstrtobool(buf, &bool_result); - if (ret) - return ret; - if (bool_result && page_poisoning_enabled()) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n"); - if (bool_result) - static_branch_enable(&init_on_free); - else - static_branch_disable(&init_on_free); - return 0; + return kstrtobool(buf, &_init_on_free_enabled_early); } early_param("init_on_free", early_init_on_free); @@ -730,19 +703,6 @@ static int __init early_debug_pagealloc(char *buf) } early_param("debug_pagealloc", early_debug_pagealloc); -void init_debug_pagealloc(void) -{ - if (!debug_pagealloc_enabled()) - return; - - static_branch_enable(&_debug_pagealloc_enabled); - - if (!debug_guardpage_minorder()) - return; - - static_branch_enable(&_debug_guardpage_enabled); -} - static int __init debug_guardpage_minorder_setup(char *buf) { unsigned long res; @@ -794,6 +754,42 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) {} #endif +/* + * Enable static keys related to various memory debugging and hardening options. + * Some override others, and depend on early params that are evaluated in the + * order of appearance. So we need to first gather the full picture of what was + * enabled, and then make decisions. + */ +void init_mem_debugging_and_hardening(void) +{ + if (_init_on_alloc_enabled_early) { + if (page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " + "will take precedence over init_on_alloc\n"); + else + static_branch_enable(&init_on_alloc); + } + if (_init_on_free_enabled_early) { + if (page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " + "will take precedence over init_on_free\n"); + else + static_branch_enable(&init_on_free); + } + +#ifdef CONFIG_DEBUG_PAGEALLOC + if (!debug_pagealloc_enabled()) + return; + + static_branch_enable(&_debug_pagealloc_enabled); + + if (!debug_guardpage_minorder()) + return; + + static_branch_enable(&_debug_guardpage_enabled); +#endif +} + static inline void set_buddy_order(struct page *page, unsigned int order) { set_page_private(page, order); -- cgit v1.2.3