summaryrefslogtreecommitdiff
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c401
1 files changed, 185 insertions, 216 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 003886606a22..f65dd69e1fd1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -467,20 +467,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
oldzone = page_zone(page);
newzone = page_zone(newpage);
- spin_lock_irq(&mapping->tree_lock);
+ xa_lock_irq(&mapping->i_pages);
- pslot = radix_tree_lookup_slot(&mapping->page_tree,
+ pslot = radix_tree_lookup_slot(&mapping->i_pages,
page_index(page));
expected_count += 1 + page_has_private(page);
if (page_count(page) != expected_count ||
- radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
- spin_unlock_irq(&mapping->tree_lock);
+ radix_tree_deref_slot_protected(pslot,
+ &mapping->i_pages.xa_lock) != page) {
+ xa_unlock_irq(&mapping->i_pages);
return -EAGAIN;
}
if (!page_ref_freeze(page, expected_count)) {
- spin_unlock_irq(&mapping->tree_lock);
+ xa_unlock_irq(&mapping->i_pages);
return -EAGAIN;
}
@@ -494,7 +495,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (mode == MIGRATE_ASYNC && head &&
!buffer_migrate_lock_buffers(head, mode)) {
page_ref_unfreeze(page, expected_count);
- spin_unlock_irq(&mapping->tree_lock);
+ xa_unlock_irq(&mapping->i_pages);
return -EAGAIN;
}
@@ -522,7 +523,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
SetPageDirty(newpage);
}
- radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+ radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
/*
* Drop cache reference from old page by unfreezing
@@ -531,7 +532,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
*/
page_ref_unfreeze(page, expected_count - 1);
- spin_unlock(&mapping->tree_lock);
+ xa_unlock(&mapping->i_pages);
/* Leave irq disabled to prevent preemption while updating stats */
/*
@@ -574,20 +575,19 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
int expected_count;
void **pslot;
- spin_lock_irq(&mapping->tree_lock);
+ xa_lock_irq(&mapping->i_pages);
- pslot = radix_tree_lookup_slot(&mapping->page_tree,
- page_index(page));
+ pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
expected_count = 2 + page_has_private(page);
if (page_count(page) != expected_count ||
- radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
- spin_unlock_irq(&mapping->tree_lock);
+ radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
+ xa_unlock_irq(&mapping->i_pages);
return -EAGAIN;
}
if (!page_ref_freeze(page, expected_count)) {
- spin_unlock_irq(&mapping->tree_lock);
+ xa_unlock_irq(&mapping->i_pages);
return -EAGAIN;
}
@@ -596,11 +596,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
get_page(newpage);
- radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+ radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
page_ref_unfreeze(page, expected_count - 1);
- spin_unlock_irq(&mapping->tree_lock);
+ xa_unlock_irq(&mapping->i_pages);
return MIGRATEPAGE_SUCCESS;
}
@@ -1137,10 +1137,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
enum migrate_reason reason)
{
int rc = MIGRATEPAGE_SUCCESS;
- int *result = NULL;
struct page *newpage;
- newpage = get_new_page(page, private, &result);
+ if (!thp_migration_supported() && PageTransHuge(page))
+ return -ENOMEM;
+
+ newpage = get_new_page(page, private);
if (!newpage)
return -ENOMEM;
@@ -1161,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
goto out;
}
- if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
- lock_page(page);
- rc = split_huge_page(page);
- unlock_page(page);
- if (rc)
- goto out;
- }
-
rc = __unmap_and_move(page, newpage, force, mode);
if (rc == MIGRATEPAGE_SUCCESS)
set_page_owner_migrate_reason(newpage, reason);
@@ -1231,12 +1225,6 @@ put_new:
put_page(newpage);
}
- if (result) {
- if (rc)
- *result = rc;
- else
- *result = page_to_nid(newpage);
- }
return rc;
}
@@ -1264,7 +1252,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
enum migrate_mode mode, int reason)
{
int rc = -EAGAIN;
- int *result = NULL;
int page_was_mapped = 0;
struct page *new_hpage;
struct anon_vma *anon_vma = NULL;
@@ -1281,7 +1268,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return -ENOSYS;
}
- new_hpage = get_new_page(hpage, private, &result);
+ new_hpage = get_new_page(hpage, private);
if (!new_hpage)
return -ENOMEM;
@@ -1345,12 +1332,6 @@ out:
else
putback_active_hugepage(new_hpage);
- if (result) {
- if (rc)
- *result = rc;
- else
- *result = page_to_nid(new_hpage);
- }
return rc;
}
@@ -1395,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
retry = 0;
list_for_each_entry_safe(page, page2, from, lru) {
+retry:
cond_resched();
if (PageHuge(page))
@@ -1408,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
switch(rc) {
case -ENOMEM:
+ /*
+ * THP migration might be unsupported or the
+ * allocation could've failed so we should
+ * retry on the same page with the THP split
+ * to base pages.
+ *
+ * Head page is retried immediately and tail
+ * pages are added to the tail of the list so
+ * we encounter them after the rest of the list
+ * is processed.
+ */
+ if (PageTransHuge(page)) {
+ lock_page(page);
+ rc = split_huge_page_to_list(page, from);
+ unlock_page(page);
+ if (!rc) {
+ list_safe_reset_next(page, page2, lru);
+ goto retry;
+ }
+ }
nr_failed++;
goto out;
case -EAGAIN:
@@ -1444,141 +1446,101 @@ out:
}
#ifdef CONFIG_NUMA
-/*
- * Move a list of individual pages
- */
-struct page_to_node {
- unsigned long addr;
- struct page *page;
- int node;
- int status;
-};
-static struct page *new_page_node(struct page *p, unsigned long private,
- int **result)
+static int store_status(int __user *status, int start, int value, int nr)
{
- struct page_to_node *pm = (struct page_to_node *)private;
-
- while (pm->node != MAX_NUMNODES && pm->page != p)
- pm++;
+ while (nr-- > 0) {
+ if (put_user(value, status + start))
+ return -EFAULT;
+ start++;
+ }
- if (pm->node == MAX_NUMNODES)
- return NULL;
+ return 0;
+}
- *result = &pm->status;
+static int do_move_pages_to_node(struct mm_struct *mm,
+ struct list_head *pagelist, int node)
+{
+ int err;
- if (PageHuge(p))
- return alloc_huge_page_node(page_hstate(compound_head(p)),
- pm->node);
- else if (thp_migration_supported() && PageTransHuge(p)) {
- struct page *thp;
+ if (list_empty(pagelist))
+ return 0;
- thp = alloc_pages_node(pm->node,
- (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
- HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
- } else
- return __alloc_pages_node(pm->node,
- GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
+ err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+ MIGRATE_SYNC, MR_SYSCALL);
+ if (err)
+ putback_movable_pages(pagelist);
+ return err;
}
/*
- * Move a set of pages as indicated in the pm array. The addr
- * field must be set to the virtual address of the page to be moved
- * and the node number must contain a valid target node.
- * The pm array ends with node = MAX_NUMNODES.
+ * Resolves the given address to a struct page, isolates it from the LRU and
+ * puts it to the given pagelist.
+ * Returns -errno if the page cannot be found/isolated or 0 when it has been
+ * queued or the page doesn't need to be migrated because it is already on
+ * the target node
*/
-static int do_move_page_to_node_array(struct mm_struct *mm,
- struct page_to_node *pm,
- int migrate_all)
+static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+ int node, struct list_head *pagelist, bool migrate_all)
{
+ struct vm_area_struct *vma;
+ struct page *page;
+ unsigned int follflags;
int err;
- struct page_to_node *pp;
- LIST_HEAD(pagelist);
down_read(&mm->mmap_sem);
+ err = -EFAULT;
+ vma = find_vma(mm, addr);
+ if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+ goto out;
- /*
- * Build a list of pages to migrate
- */
- for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
- struct vm_area_struct *vma;
- struct page *page;
- struct page *head;
- unsigned int follflags;
-
- err = -EFAULT;
- vma = find_vma(mm, pp->addr);
- if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
- goto set_status;
-
- /* FOLL_DUMP to ignore special (like zero) pages */
- follflags = FOLL_GET | FOLL_DUMP;
- if (!thp_migration_supported())
- follflags |= FOLL_SPLIT;
- page = follow_page(vma, pp->addr, follflags);
+ /* FOLL_DUMP to ignore special (like zero) pages */
+ follflags = FOLL_GET | FOLL_DUMP;
+ page = follow_page(vma, addr, follflags);
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto set_status;
+ err = PTR_ERR(page);
+ if (IS_ERR(page))
+ goto out;
- err = -ENOENT;
- if (!page)
- goto set_status;
+ err = -ENOENT;
+ if (!page)
+ goto out;
- err = page_to_nid(page);
+ err = 0;
+ if (page_to_nid(page) == node)
+ goto out_putpage;
- if (err == pp->node)
- /*
- * Node already in the right place
- */
- goto put_and_set;
+ err = -EACCES;
+ if (page_mapcount(page) > 1 && !migrate_all)
+ goto out_putpage;
- err = -EACCES;
- if (page_mapcount(page) > 1 &&
- !migrate_all)
- goto put_and_set;
-
- if (PageHuge(page)) {
- if (PageHead(page)) {
- isolate_huge_page(page, &pagelist);
- err = 0;
- pp->page = page;
- }
- goto put_and_set;
+ if (PageHuge(page)) {
+ if (PageHead(page)) {
+ isolate_huge_page(page, pagelist);
+ err = 0;
}
+ } else {
+ struct page *head;
- pp->page = compound_head(page);
head = compound_head(page);
err = isolate_lru_page(head);
- if (!err) {
- list_add_tail(&head->lru, &pagelist);
- mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON + page_is_file_cache(head),
- hpage_nr_pages(head));
- }
-put_and_set:
- /*
- * Either remove the duplicate refcount from
- * isolate_lru_page() or drop the page ref if it was
- * not isolated.
- */
- put_page(page);
-set_status:
- pp->status = err;
- }
-
- err = 0;
- if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, new_page_node, NULL,
- (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
if (err)
- putback_movable_pages(&pagelist);
- }
+ goto out_putpage;
+ err = 0;
+ list_add_tail(&head->lru, pagelist);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON + page_is_file_cache(head),
+ hpage_nr_pages(head));
+ }
+out_putpage:
+ /*
+ * Either remove the duplicate refcount from
+ * isolate_lru_page() or drop the page ref if it was
+ * not isolated.
+ */
+ put_page(page);
+out:
up_read(&mm->mmap_sem);
return err;
}
@@ -1593,79 +1555,79 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
const int __user *nodes,
int __user *status, int flags)
{
- struct page_to_node *pm;
- unsigned long chunk_nr_pages;
- unsigned long chunk_start;
- int err;
-
- err = -ENOMEM;
- pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
- if (!pm)
- goto out;
+ int current_node = NUMA_NO_NODE;
+ LIST_HEAD(pagelist);
+ int start, i;
+ int err = 0, err1;
migrate_prep();
- /*
- * Store a chunk of page_to_node array in a page,
- * but keep the last one as a marker
- */
- chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
-
- for (chunk_start = 0;
- chunk_start < nr_pages;
- chunk_start += chunk_nr_pages) {
- int j;
+ for (i = start = 0; i < nr_pages; i++) {
+ const void __user *p;
+ unsigned long addr;
+ int node;
- if (chunk_start + chunk_nr_pages > nr_pages)
- chunk_nr_pages = nr_pages - chunk_start;
-
- /* fill the chunk pm with addrs and nodes from user-space */
- for (j = 0; j < chunk_nr_pages; j++) {
- const void __user *p;
- int node;
-
- err = -EFAULT;
- if (get_user(p, pages + j + chunk_start))
- goto out_pm;
- pm[j].addr = (unsigned long) p;
-
- if (get_user(node, nodes + j + chunk_start))
- goto out_pm;
-
- err = -ENODEV;
- if (node < 0 || node >= MAX_NUMNODES)
- goto out_pm;
-
- if (!node_state(node, N_MEMORY))
- goto out_pm;
-
- err = -EACCES;
- if (!node_isset(node, task_nodes))
- goto out_pm;
+ err = -EFAULT;
+ if (get_user(p, pages + i))
+ goto out_flush;
+ if (get_user(node, nodes + i))
+ goto out_flush;
+ addr = (unsigned long)p;
+
+ err = -ENODEV;
+ if (node < 0 || node >= MAX_NUMNODES)
+ goto out_flush;
+ if (!node_state(node, N_MEMORY))
+ goto out_flush;
- pm[j].node = node;
+ err = -EACCES;
+ if (!node_isset(node, task_nodes))
+ goto out_flush;
+
+ if (current_node == NUMA_NO_NODE) {
+ current_node = node;
+ start = i;
+ } else if (node != current_node) {
+ err = do_move_pages_to_node(mm, &pagelist, current_node);
+ if (err)
+ goto out;
+ err = store_status(status, start, current_node, i - start);
+ if (err)
+ goto out;
+ start = i;
+ current_node = node;
}
- /* End marker for this chunk */
- pm[chunk_nr_pages].node = MAX_NUMNODES;
-
- /* Migrate this chunk */
- err = do_move_page_to_node_array(mm, pm,
- flags & MPOL_MF_MOVE_ALL);
- if (err < 0)
- goto out_pm;
+ /*
+ * Errors in the page lookup or isolation are not fatal and we simply
+ * report them via status
+ */
+ err = add_page_for_migration(mm, addr, current_node,
+ &pagelist, flags & MPOL_MF_MOVE_ALL);
+ if (!err)
+ continue;
- /* Return status information */
- for (j = 0; j < chunk_nr_pages; j++)
- if (put_user(pm[j].status, status + j + chunk_start)) {
- err = -EFAULT;
- goto out_pm;
- }
- }
- err = 0;
+ err = store_status(status, i, err, 1);
+ if (err)
+ goto out_flush;
-out_pm:
- free_page((unsigned long)pm);
+ err = do_move_pages_to_node(mm, &pagelist, current_node);
+ if (err)
+ goto out;
+ if (i > start) {
+ err = store_status(status, start, current_node, i - start);
+ if (err)
+ goto out;
+ }
+ current_node = NUMA_NO_NODE;
+ }
+out_flush:
+ /* Make sure we do not overwrite the existing error */
+ err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+ if (!err1)
+ err1 = store_status(status, start, current_node, i - start);
+ if (!err)
+ err = err1;
out:
return err;
}
@@ -1866,8 +1828,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
}
static struct page *alloc_misplaced_dst_page(struct page *page,
- unsigned long data,
- int **result)
+ unsigned long data)
{
int nid = (int) data;
struct page *newpage;
@@ -1987,6 +1948,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
goto out;
/*
+ * Also do not migrate dirty pages as not all filesystems can move
+ * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
+ */
+ if (page_is_file_cache(page) && PageDirty(page))
+ goto out;
+
+ /*
* Rate-limit the amount of data that is being migrated to a node.
* Optimal placement is no good if the memory bus is saturated and
* all the time is being spent migrating!
@@ -2339,7 +2307,8 @@ again:
ptep_get_and_clear(mm, addr, ptep);
/* Setup special migration page table entry */
- entry = make_migration_entry(page, pte_write(pte));
+ entry = make_migration_entry(page, mpfn &
+ MIGRATE_PFN_WRITE);
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pte))
swp_pte = pte_swp_mksoft_dirty(swp_pte);