summaryrefslogtreecommitdiff
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c199
1 files changed, 134 insertions, 65 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a13ac524f6ff..d5cd52805149 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,6 +25,7 @@
#include <linux/notifier.h>
#include <linux/rbtree.h>
#include <linux/xarray.h>
+#include <linux/io.h>
#include <linux/rcupdate.h>
#include <linux/pfn.h>
#include <linux/kmemleak.h>
@@ -36,6 +37,7 @@
#include <linux/overflow.h>
#include <linux/pgtable.h>
#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -83,10 +85,11 @@ static void free_work(struct work_struct *w)
/*** Page table manipulation functions ***/
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pte_t *pte;
u64 pfn;
+ unsigned long size = PAGE_SIZE;
pfn = phys_addr >> PAGE_SHIFT;
pte = pte_alloc_kernel_track(pmd, addr, mask);
@@ -94,9 +97,22 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
+
+#ifdef CONFIG_HUGETLB_PAGE
+ size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
+ if (size != PAGE_SIZE) {
+ pte_t entry = pfn_pte(pfn, prot);
+
+ entry = pte_mkhuge(entry);
+ entry = arch_make_huge_pte(entry, ilog2(size), 0);
+ set_huge_pte_at(&init_mm, addr, pte, entry);
+ pfn += PFN_DOWN(size);
+ continue;
+ }
+#endif
set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (pte += PFN_DOWN(size), addr += size, addr != end);
*mask |= PGTBL_PTE_MODIFIED;
return 0;
}
@@ -145,7 +161,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
continue;
}
- if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+ if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
return -ENOMEM;
} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
@@ -1592,6 +1608,7 @@ static DEFINE_MUTEX(vmap_purge_lock);
/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);
+#ifdef CONFIG_X86_64
/*
* called before a call to iounmap() if the caller wants vm_area_struct's
* immediately freed.
@@ -1600,6 +1617,7 @@ void set_iounmap_nonlazy(void)
{
atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
}
+#endif /* CONFIG_X86_64 */
/*
* Purges all lazily-freed vmap areas.
@@ -2344,15 +2362,16 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm)
}
static struct vm_struct *__get_vm_area_node(unsigned long size,
- unsigned long align, unsigned long flags, unsigned long start,
- unsigned long end, int node, gfp_t gfp_mask, const void *caller)
+ unsigned long align, unsigned long shift, unsigned long flags,
+ unsigned long start, unsigned long end, int node,
+ gfp_t gfp_mask, const void *caller)
{
struct vmap_area *va;
struct vm_struct *area;
unsigned long requested_size = size;
BUG_ON(in_interrupt());
- size = PAGE_ALIGN(size);
+ size = ALIGN(size, 1ul << shift);
if (unlikely(!size))
return NULL;
@@ -2384,8 +2403,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end,
const void *caller)
{
- return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
- GFP_KERNEL, caller);
+ return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end,
+ NUMA_NO_NODE, GFP_KERNEL, caller);
}
/**
@@ -2401,7 +2420,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
*/
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{
- return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
+ return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
+ VMALLOC_START, VMALLOC_END,
NUMA_NO_NODE, GFP_KERNEL,
__builtin_return_address(0));
}
@@ -2409,7 +2429,8 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
const void *caller)
{
- return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
+ return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
+ VMALLOC_START, VMALLOC_END,
NUMA_NO_NODE, GFP_KERNEL, caller);
}
@@ -2564,6 +2585,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
BUG_ON(!page);
__free_pages(page, page_order);
+ cond_resched();
}
atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
@@ -2755,6 +2777,54 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
EXPORT_SYMBOL_GPL(vmap_pfn);
#endif /* CONFIG_VMAP_PFN */
+static inline unsigned int
+vm_area_alloc_pages(gfp_t gfp, int nid,
+ unsigned int order, unsigned long nr_pages, struct page **pages)
+{
+ unsigned int nr_allocated = 0;
+
+ /*
+ * For order-0 pages we make use of bulk allocator, if
+ * the page array is partly or not at all populated due
+ * to fails, fallback to a single page allocator that is
+ * more permissive.
+ */
+ if (!order)
+ nr_allocated = alloc_pages_bulk_array_node(
+ gfp, nid, nr_pages, pages);
+ else
+ /*
+ * Compound pages required for remap_vmalloc_page if
+ * high-order pages.
+ */
+ gfp |= __GFP_COMP;
+
+ /* High-order pages or fallback path if "bulk" fails. */
+ while (nr_allocated < nr_pages) {
+ struct page *page;
+ int i;
+
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ break;
+
+ /*
+ * Careful, we allocate and map page-order pages, but
+ * tracking is done per PAGE_SIZE page so as to keep the
+ * vm_struct APIs independent of the physical/mapped size.
+ */
+ for (i = 0; i < (1U << order); i++)
+ pages[nr_allocated + i] = page + i;
+
+ if (gfpflags_allow_blocking(gfp))
+ cond_resched();
+
+ nr_allocated += 1U << order;
+ }
+
+ return nr_allocated;
+}
+
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, unsigned int page_shift,
int node)
@@ -2765,8 +2835,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
unsigned long array_size;
unsigned int nr_small_pages = size >> PAGE_SHIFT;
unsigned int page_order;
- struct page **pages;
- unsigned int i;
array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
gfp_mask |= __GFP_NOWARN;
@@ -2775,62 +2843,44 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
- pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+ area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
area->caller);
} else {
- pages = kmalloc_node(array_size, nested_gfp, node);
+ area->pages = kmalloc_node(array_size, nested_gfp, node);
}
- if (!pages) {
- free_vm_area(area);
+ if (!area->pages) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "page array size %lu allocation failed",
- nr_small_pages * PAGE_SIZE, array_size);
+ "vmalloc error: size %lu, failed to allocated page array size %lu",
+ nr_small_pages * PAGE_SIZE, array_size);
+ free_vm_area(area);
return NULL;
}
- area->pages = pages;
- area->nr_pages = nr_small_pages;
set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
-
page_order = vm_area_page_order(area);
- /*
- * Careful, we allocate and map page_order pages, but tracking is done
- * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
- * the physical/mapped size.
- */
- for (i = 0; i < area->nr_pages; i += 1U << page_order) {
- struct page *page;
- int p;
-
- /* Compound pages required for remap_vmalloc_page */
- page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order);
- if (unlikely(!page)) {
- /* Successfully allocated i pages, free them in __vfree() */
- area->nr_pages = i;
- atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "page order %u allocation failed",
- area->nr_pages * PAGE_SIZE, page_order);
- goto fail;
- }
+ area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
+ page_order, nr_small_pages, area->pages);
- for (p = 0; p < (1U << page_order); p++)
- area->pages[i + p] = page + p;
+ atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (gfpflags_allow_blocking(gfp_mask))
- cond_resched();
+ /*
+ * If not enough pages were obtained to accomplish an
+ * allocation request, free them via __vfree() if any.
+ */
+ if (area->nr_pages != nr_small_pages) {
+ warn_alloc(gfp_mask, NULL,
+ "vmalloc error: size %lu, page order %u, failed to allocate pages",
+ area->nr_pages * PAGE_SIZE, page_order);
+ goto fail;
}
- atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0) {
+ if (vmap_pages_range(addr, addr + size, prot, area->pages,
+ page_shift) < 0) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "failed to map pages",
- area->nr_pages * PAGE_SIZE);
+ "vmalloc error: size %lu, failed to map pages",
+ area->nr_pages * PAGE_SIZE);
goto fail;
}
@@ -2875,13 +2925,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if ((size >> PAGE_SHIFT) > totalram_pages()) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "exceeds total pages", real_size);
+ "vmalloc error: size %lu, exceeds total pages",
+ real_size);
return NULL;
}
- if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP) &&
- arch_vmap_pmd_supported(prot)) {
+ if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
unsigned long size_per_node;
/*
@@ -2894,21 +2943,23 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
size_per_node = size;
if (node == NUMA_NO_NODE)
size_per_node /= num_online_nodes();
- if (size_per_node >= PMD_SIZE) {
+ if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
shift = PMD_SHIFT;
- align = max(real_align, 1UL << shift);
- size = ALIGN(real_size, 1UL << shift);
- }
+ else
+ shift = arch_vmap_pte_supported_shift(size_per_node);
+
+ align = max(real_align, 1UL << shift);
+ size = ALIGN(real_size, 1UL << shift);
}
again:
- size = PAGE_ALIGN(size);
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
- vm_flags, start, end, node, gfp_mask, caller);
+ area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
+ VM_UNINITIALIZED | vm_flags, start, end, node,
+ gfp_mask, caller);
if (!area) {
warn_alloc(gfp_mask, NULL,
- "vmalloc size %lu allocation failure: "
- "vm_struct allocation failed", real_size);
+ "vmalloc error: size %lu, vm_struct allocation failed",
+ real_size);
goto fail;
}
@@ -2923,6 +2974,7 @@ again:
*/
clear_vm_uninitialized_flag(area);
+ size = PAGE_ALIGN(size);
kmemleak_vmalloc(area, size, gfp_mask);
return addr;
@@ -2999,6 +3051,23 @@ void *vmalloc(unsigned long size)
EXPORT_SYMBOL(vmalloc);
/**
+ * vmalloc_no_huge - allocate virtually contiguous memory using small pages
+ * @size: allocation size
+ *
+ * Allocate enough non-huge pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+void *vmalloc_no_huge(unsigned long size)
+{
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(vmalloc_no_huge);
+
+/**
* vzalloc - allocate virtually contiguous memory with zero fill
* @size: allocation size
*