summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2023-03-06 11:15:17 +0300
committerAndrew Morton <akpm@linux-foundation.org>2023-03-24 03:18:31 +0300
commite9c3cda4d86e56bf7fe403729f38c4f0f65d3860 (patch)
tree895359bf0162dd3ecacb33dab6ce24441dc83f4b /mm
parent1e760fa3596e8c7f08412712c168288b79670d78 (diff)
downloadlinux-e9c3cda4d86e56bf7fe403729f38c4f0f65d3860.tar.xz
mm, vmalloc: fix high order __GFP_NOFAIL allocations
Gao Xiang has reported that the page allocator complains about high order __GFP_NOFAIL request coming from the vmalloc core: __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5549 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2286 vm_area_alloc_pages mm/vmalloc.c:2989 [inline] __vmalloc_area_node mm/vmalloc.c:3057 [inline] __vmalloc_node_range+0x978/0x13c0 mm/vmalloc.c:3227 kvmalloc_node+0x156/0x1a0 mm/util.c:606 kvmalloc include/linux/slab.h:737 [inline] kvmalloc_array include/linux/slab.h:755 [inline] kvcalloc include/linux/slab.h:760 [inline] it seems that I have completely missed high order allocation backing vmalloc areas case when implementing __GFP_NOFAIL support. This means that [k]vmalloc at al. can allocate higher order allocations with __GFP_NOFAIL which can trigger OOM killer for non-costly orders easily or cause a lot of reclaim/compaction activity if those requests cannot be satisfied. Fix the issue by falling back to zero order allocations for __GFP_NOFAIL requests if the high order request fails. Link: https://lkml.kernel.org/r/ZAXynvdNqcI0f6Us@dhcp22.suse.cz Fixes: 9376130c390a ("mm/vmalloc: add support for __GFP_NOFAIL") Reported-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lkml.kernel.org/r/20230305053035.1911-1-hsiangkao@linux.alibaba.com Signed-off-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Baoquan He <bhe@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/vmalloc.c28
1 files changed, 23 insertions, 5 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ef910bf349e1..bef6cf2b4d46 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2883,6 +2883,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
unsigned int order, unsigned int nr_pages, struct page **pages)
{
unsigned int nr_allocated = 0;
+ gfp_t alloc_gfp = gfp;
+ bool nofail = false;
struct page *page;
int i;
@@ -2893,6 +2895,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* more permissive.
*/
if (!order) {
+ /* bulk allocator doesn't support nofail req. officially */
gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL;
while (nr_allocated < nr_pages) {
@@ -2931,20 +2934,35 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
if (nr != nr_pages_request)
break;
}
+ } else if (gfp & __GFP_NOFAIL) {
+ /*
+ * Higher order nofail allocations are really expensive and
+ * potentially dangerous (pre-mature OOM, disruptive reclaim
+ * and compaction etc.
+ */
+ alloc_gfp &= ~__GFP_NOFAIL;
+ nofail = true;
}
/* High-order pages or fallback path if "bulk" fails. */
-
while (nr_allocated < nr_pages) {
if (fatal_signal_pending(current))
break;
if (nid == NUMA_NO_NODE)
- page = alloc_pages(gfp, order);
+ page = alloc_pages(alloc_gfp, order);
else
- page = alloc_pages_node(nid, gfp, order);
- if (unlikely(!page))
- break;
+ page = alloc_pages_node(nid, alloc_gfp, order);
+ if (unlikely(!page)) {
+ if (!nofail)
+ break;
+
+ /* fall back to the zero order allocations */
+ alloc_gfp |= __GFP_NOFAIL;
+ order = 0;
+ continue;
+ }
+
/*
* Higher order allocations must be able to be treated as
* indepdenent small pages by callers (as they can with