From 15674868d6c5985466835c56dd89d39235f16302 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 4 Sep 2012 13:55:05 +0530 Subject: mm/memblock: Use NULL instead of 0 for pointers This type cleanup also fixes the following sparse warning: mm/memblock.c:249:49: warning: Using plain integer as NULL pointer Signed-off-by: Sachin Kamat Acked-by: Tejun Heo Cc: Andrew Morton Cc: patches@linaro.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memblock.c b/mm/memblock.c index 4d9393c7edc9..82aa349d2f7a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -246,7 +246,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, min(new_area_start, memblock.current_limit), new_alloc_size, PAGE_SIZE); - new_array = addr ? __va(addr) : 0; + new_array = addr ? __va(addr) : NULL; } if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", -- cgit v1.2.3 From 18b48d5873a6fc8e0e6044ddb572fcda26988f19 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Mon, 17 Sep 2012 14:08:55 -0700 Subject: memory hotplug: reset pgdat->kswapd to NULL if creating kernel thread fails If kthread_run() fails, pgdat->kswapd contains errno. When we stop this thread, we only check whether pgdat->kswapd is NULL and access it. If it contains errno, it will cause page fault. Reset pgdat->kswapd to NULL when creating kernel thread fails can avoid this problem. Signed-off-by: Wen Congyang Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index 8d01243d9560..99b434b674c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3102,6 +3102,7 @@ int kswapd_run(int nid) /* failure at boot is fatal */ BUG_ON(system_state == SYSTEM_BOOTING); printk("Failed to start kswapd on node %d\n",nid); + pgdat->kswapd = NULL; ret = -1; } return ret; -- cgit v1.2.3 From 30c29bea6af2d3b6ffc8865864de7fc08cadb5df Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 17 Sep 2012 14:09:03 -0700 Subject: slab: do ClearSlabPfmemalloc() for all pages of slab Right now, we call ClearSlabPfmemalloc() for first page of slab when we clear SlabPfmemalloc flag. This is fine for most swap-over-network use cases as it is expected that order-0 pages are in use. Unfortunately it is possible that that __ac_put_obj() checks SlabPfmemalloc on a tail page and while this is harmless, it is sloppy. This patch ensures that the head page is always used. This problem was originally identified by Joonsoo Kim. [js1304@gmail.com: Original implementation and problem identification] Signed-off-by: Mel Gorman Cc: David Miller Cc: Chuck Lever Cc: Joonsoo Kim Cc: David Rientjes Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index 811af03a14ef..d34a9034f929 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1000,7 +1000,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, l3 = cachep->nodelists[numa_mem_id()]; if (!list_empty(&l3->slabs_free) && force_refill) { struct slab *slabp = virt_to_slab(objp); - ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem)); + ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); clear_obj_pfmemalloc(&objp); recheck_pfmemalloc_active(cachep, ac); return objp; @@ -1032,7 +1032,7 @@ static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, { if (unlikely(pfmemalloc_active)) { /* Some pfmemalloc slabs exist, check if this is one */ - struct page *page = virt_to_page(objp); + struct page *page = virt_to_head_page(objp); if (PageSlabPfmemalloc(page)) set_obj_pfmemalloc(&objp); } -- cgit v1.2.3 From d014dc2ed4fae84cb92509416c8bfc9078d4f0d9 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 17 Sep 2012 14:09:06 -0700 Subject: slab: fix starting index for finding another object In array cache, there is a object at index 0, check it. Signed-off-by: Joonsoo Kim Signed-off-by: Mel Gorman Cc: David Miller Cc: Chuck Lever Cc: David Rientjes Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index d34a9034f929..c6854759bcf1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -983,7 +983,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, } /* The caller cannot use PFMEMALLOC objects, find another one */ - for (i = 1; i < ac->avail; i++) { + for (i = 0; i < ac->avail; i++) { /* If a !PFMEMALLOC object is found, swap them */ if (!is_obj_pfmemalloc(ac->entry[i])) { objp = ac->entry[i]; -- cgit v1.2.3 From 8ba00bb68a067c221cc3ea3a0293e8fcbdcb7ba1 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 17 Sep 2012 14:09:09 -0700 Subject: slub: consider pfmemalloc_match() in get_partial_node() get_partial() is currently not checking pfmemalloc_match() meaning that it is possible for pfmemalloc pages to leak to non-pfmemalloc users. This is a problem in the following situation. Assume that there is a request from normal allocation and there are no objects in the per-cpu cache and no node-partial slab. In this case, slab_alloc enters the slow path and new_slab_objects() is called which may return a PFMEMALLOC page. As the current user is not allowed to access PFMEMALLOC page, deactivate_slab() is called ([5091b74a: mm: slub: optimise the SLUB fast path to avoid pfmemalloc checks]) and returns an object from PFMEMALLOC page. Next time, when we get another request from normal allocation, slab_alloc() enters the slow-path and calls new_slab_objects(). In new_slab_objects(), we call get_partial() and get a partial slab which was just deactivated but is a pfmemalloc page. We extract one object from it and re-deactivate. "deactivate -> re-get in get_partial -> re-deactivate" occures repeatedly. As a result, access to PFMEMALLOC page is not properly restricted and it can cause a performance degradation due to frequent deactivation. deactivation frequently. This patch changes get_partial_node() to take pfmemalloc_match() into account and prevents the "deactivate -> re-get in get_partial() scenario. Instead, new_slab() is called. Signed-off-by: Joonsoo Kim Acked-by: David Rientjes Signed-off-by: Mel Gorman Cc: David Miller Cc: Chuck Lever Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 8f78e2577031..2fdd96f9e998 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1524,12 +1524,13 @@ static inline void *acquire_slab(struct kmem_cache *s, } static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); +static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); /* * Try to allocate a partial slab from a specific node. */ -static void *get_partial_node(struct kmem_cache *s, - struct kmem_cache_node *n, struct kmem_cache_cpu *c) +static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, + struct kmem_cache_cpu *c, gfp_t flags) { struct page *page, *page2; void *object = NULL; @@ -1545,9 +1546,13 @@ static void *get_partial_node(struct kmem_cache *s, spin_lock(&n->list_lock); list_for_each_entry_safe(page, page2, &n->partial, lru) { - void *t = acquire_slab(s, n, page, object == NULL); + void *t; int available; + if (!pfmemalloc_match(page, flags)) + continue; + + t = acquire_slab(s, n, page, object == NULL); if (!t) break; @@ -1614,7 +1619,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > s->min_partial) { - object = get_partial_node(s, n, c); + object = get_partial_node(s, n, c, flags); if (object) { /* * Return the object even if @@ -1643,7 +1648,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, void *object; int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; - object = get_partial_node(s, get_node(s, searchnode), c); + object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) return object; -- cgit v1.2.3 From 0ba8f2d59304dfe69b59c034de723ad80f7ab9ac Mon Sep 17 00:00:00 2001 From: Li Haifeng Date: Mon, 17 Sep 2012 14:09:21 -0700 Subject: mm/page_alloc: fix the page address of higher page's buddy calculation The heuristic method for buddy has been introduced since commit 43506fad21ca ("mm/page_alloc.c: simplify calculation of combined index of adjacent buddy lists"). But the page address of higher page's buddy was wrongly calculated, which will lead page_is_buddy to fail for ever. IOW, the heuristic method would be disabled with the wrong page address of higher page's buddy. Calculating the page address of higher page's buddy should be based higher_page with the offset between index of higher page and index of higher page's buddy. Signed-off-by: Haifeng Li Signed-off-by: Gavin Shan Reviewed-by: Michal Hocko Cc: KyongHo Cho Cc: Mel Gorman Cc: Minchan Kim Cc: Johannes Weiner Cc: [2.6.38+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c66fb875104a..c13ea7538891 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -584,7 +584,7 @@ static inline void __free_one_page(struct page *page, combined_idx = buddy_idx & page_idx; higher_page = page + (combined_idx - page_idx); buddy_idx = __find_buddy_index(combined_idx, order + 1); - higher_buddy = page + (buddy_idx - combined_idx); + higher_buddy = higher_page + (buddy_idx - combined_idx); if (page_is_buddy(higher_page, higher_buddy, order + 1)) { list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); -- cgit v1.2.3 From f14851af0ebb32745c6c5a2e400aa0549f9d20df Mon Sep 17 00:00:00 2001 From: qiuxishi Date: Mon, 17 Sep 2012 14:09:24 -0700 Subject: memory hotplug: fix section info double registration bug There may be a bug when registering section info. For example, on my Itanium platform, the pfn range of node0 includes the other nodes, so other nodes' section info will be double registered, and memmap's page count will equal to 3. node0: start_pfn=0x100, spanned_pfn=0x20fb00, present_pfn=0x7f8a3, => 0x000100-0x20fc00 node1: start_pfn=0x80000, spanned_pfn=0x80000, present_pfn=0x80000, => 0x080000-0x100000 node2: start_pfn=0x100000, spanned_pfn=0x80000, present_pfn=0x80000, => 0x100000-0x180000 node3: start_pfn=0x180000, spanned_pfn=0x80000, present_pfn=0x80000, => 0x180000-0x200000 free_all_bootmem_node() register_page_bootmem_info_node() register_page_bootmem_info_section() When hot remove memory, we can't free the memmap's page because page_count() is 2 after put_page_bootmem(). sparse_remove_one_section() free_section_usemap() free_map_bootmem() put_page_bootmem() [akpm@linux-foundation.org: add code comment] Signed-off-by: Xishi Qiu Signed-off-by: Jiang Liu Acked-by: Mel Gorman Cc: "Luck, Tony" Cc: Yasuaki Ishimatsu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3ad25f9d1fc1..6a5b90d0cfd7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -126,9 +126,6 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) struct mem_section *ms; struct page *page, *memmap; - if (!pfn_valid(start_pfn)) - return; - section_nr = pfn_to_section_nr(start_pfn); ms = __nr_to_section(section_nr); @@ -187,9 +184,16 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) end_pfn = pfn + pgdat->node_spanned_pages; /* register_section info */ - for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) - register_page_bootmem_info_section(pfn); - + for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + /* + * Some platforms can assign the same pfn to multiple nodes - on + * node0 as well as nodeN. To avoid registering a pfn against + * multiple nodes we check that this pfn does not already + * reside in some other node. + */ + if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node)) + register_page_bootmem_info_section(pfn); + } } #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ -- cgit v1.2.3