diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 91 |
1 files changed, 81 insertions, 10 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ec49d9ef1eef..3edb759c5c7d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h, * is not the case is if a reserve map was changed between calls. It * is the responsibility of the caller to notice the difference and * take appropriate action. + * + * vma_add_reservation is used in error paths where a reservation must + * be restored when a newly allocated huge page must be freed. It is + * to be called after calling vma_needs_reservation to determine if a + * reservation exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, + VMA_ADD_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h, region_abort(resv, idx, idx + 1); ret = 0; break; + case VMA_ADD_RESV: + if (vma->vm_flags & VM_MAYSHARE) + ret = region_add(resv, idx, idx + 1); + else { + region_abort(resv, idx, idx + 1); + ret = region_del(resv, idx, idx + 1); + } + break; default: BUG(); } @@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h, (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV); } +static long vma_add_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); +} + +/* + * This routine is called to restore a reservation on error paths. In the + * specific error paths, a huge page was allocated (via alloc_huge_page) + * and is about to be freed. If a reservation for the page existed, + * alloc_huge_page would have consumed the reservation and set PagePrivate + * in the newly allocated page. When the page is freed via free_huge_page, + * the global reservation count will be incremented if PagePrivate is set. + * However, free_huge_page can not adjust the reserve map. Adjust the + * reserve map here to be consistent with global reserve count adjustments + * to be made by free_huge_page. + */ +static void restore_reserve_on_error(struct hstate *h, + struct vm_area_struct *vma, unsigned long address, + struct page *page) +{ + if (unlikely(PagePrivate(page))) { + long rc = vma_needs_reservation(h, vma, address); + + if (unlikely(rc < 0)) { + /* + * Rare out of memory condition in reserve map + * manipulation. Clear PagePrivate so that + * global reserve count will not be incremented + * by free_huge_page. This will make it appear + * as though the reservation for this page was + * consumed. This may prevent the task from + * faulting in the page at a later time. This + * is better than inconsistent global huge page + * accounting of reserve counts. + */ + ClearPagePrivate(page); + } else if (rc) { + rc = vma_add_reservation(h, vma, address); + if (unlikely(rc < 0)) + /* + * See above comment about rare out of + * memory condition. + */ + ClearPagePrivate(page); + } else + vma_end_reservation(h, vma, address); + } +} + struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { @@ -3222,6 +3286,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); + /* + * This is a hugetlb vma, all the pte entries should point + * to huge page. + */ + tlb_remove_check_page_size_change(tlb, sz); tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); address = start; @@ -3272,7 +3341,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } pte = huge_ptep_get_and_clear(mm, address, ptep); - tlb_remove_tlb_entry(tlb, ptep, address); + tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); @@ -3386,15 +3455,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * Keep the pte_same checks anyway to make transition from the mutex easier. */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, pte_t pte, - struct page *pagecache_page, spinlock_t *ptl) + unsigned long address, pte_t *ptep, + struct page *pagecache_page, spinlock_t *ptl) { + pte_t pte; struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; int ret = 0, outside_reserve = 0; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ + pte = huge_ptep_get(ptep); old_page = pte_page(pte); retry_avoidcopy: @@ -3498,6 +3569,7 @@ retry_avoidcopy: spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out_release_all: + restore_reserve_on_error(h, vma, address, new_page); put_page(new_page); out_release_old: put_page(old_page); @@ -3646,8 +3718,7 @@ retry: vma_end_reservation(h, vma, address); } - ptl = huge_pte_lockptr(h, mm, ptep); - spin_lock(ptl); + ptl = huge_pte_lock(h, mm, ptep); size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto backout; @@ -3668,7 +3739,7 @@ retry: hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); + ret = hugetlb_cow(mm, vma, address, ptep, page, ptl); } spin_unlock(ptl); @@ -3680,6 +3751,7 @@ backout: spin_unlock(ptl); backout_unlocked: unlock_page(page); + restore_reserve_on_error(h, vma, address, page); put_page(page); goto out; } @@ -3822,8 +3894,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & FAULT_FLAG_WRITE) { if (!huge_pte_write(entry)) { - ret = hugetlb_cow(mm, vma, address, ptep, entry, - pagecache_page, ptl); + ret = hugetlb_cow(mm, vma, address, ptep, + pagecache_page, ptl); goto out_put_page; } entry = huge_pte_mkdirty(entry); @@ -4264,8 +4336,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!spte) goto out; - ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); - spin_lock(ptl); + ptl = huge_pte_lock(hstate_vma(vma), mm, spte); if (pud_none(*pud)) { pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); |