diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 137 |
1 files changed, 79 insertions, 58 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0bdfc7e1c933..2ca4e8c3163e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -456,14 +456,12 @@ static int allocate_file_region_entries(struct resv_map *resv, int regions_needed) __must_hold(&resv->lock) { - struct list_head allocated_regions; + LIST_HEAD(allocated_regions); int to_allocate = 0, i = 0; struct file_region *trg = NULL, *rg = NULL; VM_BUG_ON(regions_needed < 0); - INIT_LIST_HEAD(&allocated_regions); - /* * Check for sufficient descriptors in the cache to accommodate * the number of in progress add operations plus regions_needed. @@ -1506,6 +1504,10 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); set_page_private(page, 0); + /* + * We have to set HPageVmemmapOptimized again as above + * set_page_private(page, 0) cleared it. + */ SetHPageVmemmapOptimized(page); /* @@ -2336,7 +2338,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, static int gather_surplus_pages(struct hstate *h, long delta) __must_hold(&hugetlb_lock) { - struct list_head surplus_list; + LIST_HEAD(surplus_list); struct page *page, *tmp; int ret; long i; @@ -2351,7 +2353,6 @@ static int gather_surplus_pages(struct hstate *h, long delta) } allocated = 0; - INIT_LIST_HEAD(&surplus_list); ret = -ENOMEM; retry: @@ -3474,7 +3475,8 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) * based on pool changes for the demoted page. */ h->max_huge_pages--; - target_hstate->max_huge_pages += pages_per_huge_page(h); + target_hstate->max_huge_pages += + pages_per_huge_page(h) / pages_per_huge_page(target_hstate); return rc; } @@ -3767,8 +3769,7 @@ HSTATE_ATTR_WO(demote); static ssize_t demote_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int nid; - struct hstate *h = kobj_to_hstate(kobj, &nid); + struct hstate *h = kobj_to_hstate(kobj, NULL); unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; return sysfs_emit(buf, "%lukB\n", demote_size); @@ -3781,7 +3782,6 @@ static ssize_t demote_size_store(struct kobject *kobj, struct hstate *h, *demote_hstate; unsigned long demote_size; unsigned int demote_order; - int nid; demote_size = (unsigned long)memparse(buf, NULL); @@ -3793,7 +3793,7 @@ static ssize_t demote_size_store(struct kobject *kobj, return -EINVAL; /* demote order must be smaller than hstate order */ - h = kobj_to_hstate(kobj, &nid); + h = kobj_to_hstate(kobj, NULL); if (demote_order >= h->order) return -EINVAL; @@ -3847,15 +3847,22 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, if (retval) { kobject_put(hstate_kobjs[hi]); hstate_kobjs[hi] = NULL; + return retval; } if (h->demote_order) { - if (sysfs_create_group(hstate_kobjs[hi], - &hstate_demote_attr_group)) + retval = sysfs_create_group(hstate_kobjs[hi], + &hstate_demote_attr_group); + if (retval) { pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); + sysfs_remove_group(hstate_kobjs[hi], hstate_attr_group); + kobject_put(hstate_kobjs[hi]); + hstate_kobjs[hi] = NULL; + return retval; + } } - return retval; + return 0; } static void __init hugetlb_sysfs_init(void) @@ -3941,10 +3948,15 @@ static void hugetlb_unregister_node(struct node *node) for_each_hstate(h) { int idx = hstate_index(h); - if (nhs->hstate_kobjs[idx]) { - kobject_put(nhs->hstate_kobjs[idx]); - nhs->hstate_kobjs[idx] = NULL; - } + struct kobject *hstate_kobj = nhs->hstate_kobjs[idx]; + + if (!hstate_kobj) + continue; + if (h->demote_order) + sysfs_remove_group(hstate_kobj, &hstate_demote_attr_group); + sysfs_remove_group(hstate_kobj, &per_node_hstate_attr_group); + kobject_put(hstate_kobj); + nhs->hstate_kobjs[idx] = NULL; } kobject_put(nhs->hugepages_kobj); @@ -4019,6 +4031,14 @@ static void hugetlb_register_all_nodes(void) { } #endif +#ifdef CONFIG_CMA +static void __init hugetlb_cma_check(void); +#else +static inline __init void hugetlb_cma_check(void) +{ +} +#endif + static int __init hugetlb_init(void) { int i; @@ -4118,7 +4138,7 @@ void __init hugetlb_add_hstate(unsigned int order) h->next_nid_to_alloc = first_memory_node; h->next_nid_to_free = first_memory_node; snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", - huge_page_size(h)/1024); + huge_page_size(h)/SZ_1K); parsed_hstate = h; } @@ -4133,11 +4153,11 @@ static void __init hugepages_clear_pages_in_node(void) if (!hugetlb_max_hstate) { default_hstate_max_huge_pages = 0; memset(default_hugepages_in_node, 0, - MAX_NUMNODES * sizeof(unsigned int)); + sizeof(default_hugepages_in_node)); } else { parsed_hstate->max_huge_pages = 0; memset(parsed_hstate->max_huge_pages_node, 0, - MAX_NUMNODES * sizeof(unsigned int)); + sizeof(parsed_hstate->max_huge_pages_node)); } } @@ -4332,18 +4352,34 @@ static int __init default_hugepagesz_setup(char *s) } __setup("default_hugepagesz=", default_hugepagesz_setup); +static nodemask_t *policy_mbind_nodemask(gfp_t gfp) +{ +#ifdef CONFIG_NUMA + struct mempolicy *mpol = get_task_policy(current); + + /* + * Only enforce MPOL_BIND policy which overlaps with cpuset policy + * (from policy_nodemask) specifically for hugetlb case + */ + if (mpol->mode == MPOL_BIND && + (apply_policy_zone(mpol, gfp_zone(gfp)) && + cpuset_nodemask_valid_mems_allowed(&mpol->nodes))) + return &mpol->nodes; +#endif + return NULL; +} + static unsigned int allowed_mems_nr(struct hstate *h) { int node; unsigned int nr = 0; - nodemask_t *mpol_allowed; + nodemask_t *mbind_nodemask; unsigned int *array = h->free_huge_pages_node; gfp_t gfp_mask = htlb_alloc_mask(h); - mpol_allowed = policy_nodemask_current(gfp_mask); - + mbind_nodemask = policy_mbind_nodemask(gfp_mask); for_each_node_mask(node, cpuset_current_mems_allowed) { - if (!mpol_allowed || node_isset(node, *mpol_allowed)) + if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) nr += array[node]; } @@ -4723,7 +4759,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { - pte_t *src_pte, *dst_pte, entry, dst_entry; + pte_t *src_pte, *dst_pte, entry; struct page *ptepage; unsigned long addr; bool cow = is_cow_mapping(src_vma->vm_flags); @@ -4768,15 +4804,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, /* * If the pagetables are shared don't copy or take references. - * dst_pte == src_pte is the common case of src/dest sharing. * + * dst_pte == src_pte is the common case of src/dest sharing. * However, src could have 'unshared' and dst shares with - * another vma. If dst_pte !none, this implies sharing. - * Check here before taking page table lock, and once again - * after taking the lock below. + * another vma. So page_count of ptep page is checked instead + * to reliably determine whether pte is shared. */ - dst_entry = huge_ptep_get(dst_pte); - if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) { + if (page_count(virt_to_page(dst_pte)) > 1) { addr |= last_addr_mask; continue; } @@ -4785,13 +4819,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - dst_entry = huge_ptep_get(dst_pte); again: - if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + if (huge_pte_none(entry)) { /* - * Skip if src entry none. Also, skip in the - * unlikely case dst entry !none as this implies - * sharing with another vma. + * Skip if src entry none. */ ; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { @@ -4870,7 +4901,7 @@ again: restore_reserve_on_error(h, dst_vma, addr, new); put_page(new); - /* dst_entry won't change as in child */ + /* huge_ptep of dst_pte won't change as in child */ goto again; } hugetlb_install_page(dst_vma, dst_pte, addr, new); @@ -5316,7 +5347,6 @@ retry_avoidcopy: u32 hash; put_page(old_page); - BUG_ON(huge_pte_none(pte)); /* * Drop hugetlb_fault_mutex and i_mmap_rwsem before * unmapping. unmapping needs to hold i_mmap_rwsem @@ -5408,19 +5438,6 @@ out_release_old: return ret; } -/* Return the pagecache page at a given address within a VMA */ -static struct page *hugetlbfs_pagecache_page(struct hstate *h, - struct vm_area_struct *vma, unsigned long address) -{ - struct address_space *mapping; - pgoff_t idx; - - mapping = vma->vm_file->f_mapping; - idx = vma_hugecache_offset(h, vma, address); - - return find_lock_page(mapping, idx); -} - /* * Return whether there is a pagecache page to back given address within VMA. * Caller follow_hugetlb_page() holds page_table_lock so we cannot lock_page. @@ -5547,7 +5564,6 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (idx >= size) goto out; -retry: new_page = false; page = find_lock_page(mapping, idx); if (!page) { @@ -5587,9 +5603,15 @@ retry: if (vma->vm_flags & VM_MAYSHARE) { int err = huge_add_to_page_cache(page, mapping, idx); if (err) { + /* + * err can't be -EEXIST which implies someone + * else consumed the reservation since hugetlb + * fault mutex is held when add a hugetlb page + * to the page cache. So it's safe to call + * restore_reserve_on_error() here. + */ + restore_reserve_on_error(h, vma, haddr, page); put_page(page); - if (err == -EEXIST) - goto retry; goto out; } new_pagecache_page = true; @@ -5810,7 +5832,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* Just decrements count, does not deallocate */ vma_end_reservation(h, vma, haddr); - pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr); + pagecache_page = find_lock_page(mapping, idx); } ptl = huge_pte_lock(h, mm, ptep); @@ -6017,8 +6039,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, page_in_pagecache = true; } - ptl = huge_pte_lockptr(h, dst_mm, dst_pte); - spin_lock(ptl); + ptl = huge_pte_lock(h, dst_mm, dst_pte); /* * Recheck the i_size after holding PT lock to make sure not @@ -7334,7 +7355,7 @@ void __init hugetlb_cma_reserve(int order) hugetlb_cma_size = 0; } -void __init hugetlb_cma_check(void) +static void __init hugetlb_cma_check(void) { if (!hugetlb_cma_size || cma_reserve_called) return; |