diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 401 |
1 files changed, 185 insertions, 216 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 003886606a22..f65dd69e1fd1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -467,20 +467,21 @@ int migrate_page_move_mapping(struct address_space *mapping, oldzone = page_zone(page); newzone = page_zone(newpage); - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); - pslot = radix_tree_lookup_slot(&mapping->page_tree, + pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page)); expected_count += 1 + page_has_private(page); if (page_count(page) != expected_count || - radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { - spin_unlock_irq(&mapping->tree_lock); + radix_tree_deref_slot_protected(pslot, + &mapping->i_pages.xa_lock) != page) { + xa_unlock_irq(&mapping->i_pages); return -EAGAIN; } if (!page_ref_freeze(page, expected_count)) { - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return -EAGAIN; } @@ -494,7 +495,7 @@ int migrate_page_move_mapping(struct address_space *mapping, if (mode == MIGRATE_ASYNC && head && !buffer_migrate_lock_buffers(head, mode)) { page_ref_unfreeze(page, expected_count); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return -EAGAIN; } @@ -522,7 +523,7 @@ int migrate_page_move_mapping(struct address_space *mapping, SetPageDirty(newpage); } - radix_tree_replace_slot(&mapping->page_tree, pslot, newpage); + radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); /* * Drop cache reference from old page by unfreezing @@ -531,7 +532,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ page_ref_unfreeze(page, expected_count - 1); - spin_unlock(&mapping->tree_lock); + xa_unlock(&mapping->i_pages); /* Leave irq disabled to prevent preemption while updating stats */ /* @@ -574,20 +575,19 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, int expected_count; void **pslot; - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); - pslot = radix_tree_lookup_slot(&mapping->page_tree, - page_index(page)); + pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page)); expected_count = 2 + page_has_private(page); if (page_count(page) != expected_count || - radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { - spin_unlock_irq(&mapping->tree_lock); + radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) { + xa_unlock_irq(&mapping->i_pages); return -EAGAIN; } if (!page_ref_freeze(page, expected_count)) { - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return -EAGAIN; } @@ -596,11 +596,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, get_page(newpage); - radix_tree_replace_slot(&mapping->page_tree, pslot, newpage); + radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); page_ref_unfreeze(page, expected_count - 1); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return MIGRATEPAGE_SUCCESS; } @@ -1137,10 +1137,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, enum migrate_reason reason) { int rc = MIGRATEPAGE_SUCCESS; - int *result = NULL; struct page *newpage; - newpage = get_new_page(page, private, &result); + if (!thp_migration_supported() && PageTransHuge(page)) + return -ENOMEM; + + newpage = get_new_page(page, private); if (!newpage) return -ENOMEM; @@ -1161,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, goto out; } - if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) { - lock_page(page); - rc = split_huge_page(page); - unlock_page(page); - if (rc) - goto out; - } - rc = __unmap_and_move(page, newpage, force, mode); if (rc == MIGRATEPAGE_SUCCESS) set_page_owner_migrate_reason(newpage, reason); @@ -1231,12 +1225,6 @@ put_new: put_page(newpage); } - if (result) { - if (rc) - *result = rc; - else - *result = page_to_nid(newpage); - } return rc; } @@ -1264,7 +1252,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, enum migrate_mode mode, int reason) { int rc = -EAGAIN; - int *result = NULL; int page_was_mapped = 0; struct page *new_hpage; struct anon_vma *anon_vma = NULL; @@ -1281,7 +1268,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, return -ENOSYS; } - new_hpage = get_new_page(hpage, private, &result); + new_hpage = get_new_page(hpage, private); if (!new_hpage) return -ENOMEM; @@ -1345,12 +1332,6 @@ out: else putback_active_hugepage(new_hpage); - if (result) { - if (rc) - *result = rc; - else - *result = page_to_nid(new_hpage); - } return rc; } @@ -1395,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, retry = 0; list_for_each_entry_safe(page, page2, from, lru) { +retry: cond_resched(); if (PageHuge(page)) @@ -1408,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, switch(rc) { case -ENOMEM: + /* + * THP migration might be unsupported or the + * allocation could've failed so we should + * retry on the same page with the THP split + * to base pages. + * + * Head page is retried immediately and tail + * pages are added to the tail of the list so + * we encounter them after the rest of the list + * is processed. + */ + if (PageTransHuge(page)) { + lock_page(page); + rc = split_huge_page_to_list(page, from); + unlock_page(page); + if (!rc) { + list_safe_reset_next(page, page2, lru); + goto retry; + } + } nr_failed++; goto out; case -EAGAIN: @@ -1444,141 +1446,101 @@ out: } #ifdef CONFIG_NUMA -/* - * Move a list of individual pages - */ -struct page_to_node { - unsigned long addr; - struct page *page; - int node; - int status; -}; -static struct page *new_page_node(struct page *p, unsigned long private, - int **result) +static int store_status(int __user *status, int start, int value, int nr) { - struct page_to_node *pm = (struct page_to_node *)private; - - while (pm->node != MAX_NUMNODES && pm->page != p) - pm++; + while (nr-- > 0) { + if (put_user(value, status + start)) + return -EFAULT; + start++; + } - if (pm->node == MAX_NUMNODES) - return NULL; + return 0; +} - *result = &pm->status; +static int do_move_pages_to_node(struct mm_struct *mm, + struct list_head *pagelist, int node) +{ + int err; - if (PageHuge(p)) - return alloc_huge_page_node(page_hstate(compound_head(p)), - pm->node); - else if (thp_migration_supported() && PageTransHuge(p)) { - struct page *thp; + if (list_empty(pagelist)) + return 0; - thp = alloc_pages_node(pm->node, - (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM, - HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } else - return __alloc_pages_node(pm->node, - GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); + err = migrate_pages(pagelist, alloc_new_node_page, NULL, node, + MIGRATE_SYNC, MR_SYSCALL); + if (err) + putback_movable_pages(pagelist); + return err; } /* - * Move a set of pages as indicated in the pm array. The addr - * field must be set to the virtual address of the page to be moved - * and the node number must contain a valid target node. - * The pm array ends with node = MAX_NUMNODES. + * Resolves the given address to a struct page, isolates it from the LRU and + * puts it to the given pagelist. + * Returns -errno if the page cannot be found/isolated or 0 when it has been + * queued or the page doesn't need to be migrated because it is already on + * the target node */ -static int do_move_page_to_node_array(struct mm_struct *mm, - struct page_to_node *pm, - int migrate_all) +static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, + int node, struct list_head *pagelist, bool migrate_all) { + struct vm_area_struct *vma; + struct page *page; + unsigned int follflags; int err; - struct page_to_node *pp; - LIST_HEAD(pagelist); down_read(&mm->mmap_sem); + err = -EFAULT; + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start || !vma_migratable(vma)) + goto out; - /* - * Build a list of pages to migrate - */ - for (pp = pm; pp->node != MAX_NUMNODES; pp++) { - struct vm_area_struct *vma; - struct page *page; - struct page *head; - unsigned int follflags; - - err = -EFAULT; - vma = find_vma(mm, pp->addr); - if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) - goto set_status; - - /* FOLL_DUMP to ignore special (like zero) pages */ - follflags = FOLL_GET | FOLL_DUMP; - if (!thp_migration_supported()) - follflags |= FOLL_SPLIT; - page = follow_page(vma, pp->addr, follflags); + /* FOLL_DUMP to ignore special (like zero) pages */ + follflags = FOLL_GET | FOLL_DUMP; + page = follow_page(vma, addr, follflags); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto set_status; + err = PTR_ERR(page); + if (IS_ERR(page)) + goto out; - err = -ENOENT; - if (!page) - goto set_status; + err = -ENOENT; + if (!page) + goto out; - err = page_to_nid(page); + err = 0; + if (page_to_nid(page) == node) + goto out_putpage; - if (err == pp->node) - /* - * Node already in the right place - */ - goto put_and_set; + err = -EACCES; + if (page_mapcount(page) > 1 && !migrate_all) + goto out_putpage; - err = -EACCES; - if (page_mapcount(page) > 1 && - !migrate_all) - goto put_and_set; - - if (PageHuge(page)) { - if (PageHead(page)) { - isolate_huge_page(page, &pagelist); - err = 0; - pp->page = page; - } - goto put_and_set; + if (PageHuge(page)) { + if (PageHead(page)) { + isolate_huge_page(page, pagelist); + err = 0; } + } else { + struct page *head; - pp->page = compound_head(page); head = compound_head(page); err = isolate_lru_page(head); - if (!err) { - list_add_tail(&head->lru, &pagelist); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_cache(head), - hpage_nr_pages(head)); - } -put_and_set: - /* - * Either remove the duplicate refcount from - * isolate_lru_page() or drop the page ref if it was - * not isolated. - */ - put_page(page); -set_status: - pp->status = err; - } - - err = 0; - if (!list_empty(&pagelist)) { - err = migrate_pages(&pagelist, new_page_node, NULL, - (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL); if (err) - putback_movable_pages(&pagelist); - } + goto out_putpage; + err = 0; + list_add_tail(&head->lru, pagelist); + mod_node_page_state(page_pgdat(head), + NR_ISOLATED_ANON + page_is_file_cache(head), + hpage_nr_pages(head)); + } +out_putpage: + /* + * Either remove the duplicate refcount from + * isolate_lru_page() or drop the page ref if it was + * not isolated. + */ + put_page(page); +out: up_read(&mm->mmap_sem); return err; } @@ -1593,79 +1555,79 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, const int __user *nodes, int __user *status, int flags) { - struct page_to_node *pm; - unsigned long chunk_nr_pages; - unsigned long chunk_start; - int err; - - err = -ENOMEM; - pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); - if (!pm) - goto out; + int current_node = NUMA_NO_NODE; + LIST_HEAD(pagelist); + int start, i; + int err = 0, err1; migrate_prep(); - /* - * Store a chunk of page_to_node array in a page, - * but keep the last one as a marker - */ - chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1; - - for (chunk_start = 0; - chunk_start < nr_pages; - chunk_start += chunk_nr_pages) { - int j; + for (i = start = 0; i < nr_pages; i++) { + const void __user *p; + unsigned long addr; + int node; - if (chunk_start + chunk_nr_pages > nr_pages) - chunk_nr_pages = nr_pages - chunk_start; - - /* fill the chunk pm with addrs and nodes from user-space */ - for (j = 0; j < chunk_nr_pages; j++) { - const void __user *p; - int node; - - err = -EFAULT; - if (get_user(p, pages + j + chunk_start)) - goto out_pm; - pm[j].addr = (unsigned long) p; - - if (get_user(node, nodes + j + chunk_start)) - goto out_pm; - - err = -ENODEV; - if (node < 0 || node >= MAX_NUMNODES) - goto out_pm; - - if (!node_state(node, N_MEMORY)) - goto out_pm; - - err = -EACCES; - if (!node_isset(node, task_nodes)) - goto out_pm; + err = -EFAULT; + if (get_user(p, pages + i)) + goto out_flush; + if (get_user(node, nodes + i)) + goto out_flush; + addr = (unsigned long)p; + + err = -ENODEV; + if (node < 0 || node >= MAX_NUMNODES) + goto out_flush; + if (!node_state(node, N_MEMORY)) + goto out_flush; - pm[j].node = node; + err = -EACCES; + if (!node_isset(node, task_nodes)) + goto out_flush; + + if (current_node == NUMA_NO_NODE) { + current_node = node; + start = i; + } else if (node != current_node) { + err = do_move_pages_to_node(mm, &pagelist, current_node); + if (err) + goto out; + err = store_status(status, start, current_node, i - start); + if (err) + goto out; + start = i; + current_node = node; } - /* End marker for this chunk */ - pm[chunk_nr_pages].node = MAX_NUMNODES; - - /* Migrate this chunk */ - err = do_move_page_to_node_array(mm, pm, - flags & MPOL_MF_MOVE_ALL); - if (err < 0) - goto out_pm; + /* + * Errors in the page lookup or isolation are not fatal and we simply + * report them via status + */ + err = add_page_for_migration(mm, addr, current_node, + &pagelist, flags & MPOL_MF_MOVE_ALL); + if (!err) + continue; - /* Return status information */ - for (j = 0; j < chunk_nr_pages; j++) - if (put_user(pm[j].status, status + j + chunk_start)) { - err = -EFAULT; - goto out_pm; - } - } - err = 0; + err = store_status(status, i, err, 1); + if (err) + goto out_flush; -out_pm: - free_page((unsigned long)pm); + err = do_move_pages_to_node(mm, &pagelist, current_node); + if (err) + goto out; + if (i > start) { + err = store_status(status, start, current_node, i - start); + if (err) + goto out; + } + current_node = NUMA_NO_NODE; + } +out_flush: + /* Make sure we do not overwrite the existing error */ + err1 = do_move_pages_to_node(mm, &pagelist, current_node); + if (!err1) + err1 = store_status(status, start, current_node, i - start); + if (!err) + err = err1; out: return err; } @@ -1866,8 +1828,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat, } static struct page *alloc_misplaced_dst_page(struct page *page, - unsigned long data, - int **result) + unsigned long data) { int nid = (int) data; struct page *newpage; @@ -1987,6 +1948,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, goto out; /* + * Also do not migrate dirty pages as not all filesystems can move + * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles. + */ + if (page_is_file_cache(page) && PageDirty(page)) + goto out; + + /* * Rate-limit the amount of data that is being migrated to a node. * Optimal placement is no good if the memory bus is saturated and * all the time is being spent migrating! @@ -2339,7 +2307,8 @@ again: ptep_get_and_clear(mm, addr, ptep); /* Setup special migration page table entry */ - entry = make_migration_entry(page, pte_write(pte)); + entry = make_migration_entry(page, mpfn & + MIGRATE_PFN_WRITE); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pte)) swp_pte = pte_swp_mksoft_dirty(swp_pte); |