mm: Close races between THP migration and PMD numa clearing
THP migration uses the page lock to guard against parallel allocations but there are cases like this still open Task A Task B --------------------- --------------------- do_huge_pmd_numa_page do_huge_pmd_numa_page lock_page mpol_misplaced == -1 unlock_page goto clear_pmdnuma lock_page mpol_misplaced == 2 migrate_misplaced_transhuge pmd = pmd_mknonnuma set_pmd_at During hours of testing, one crashed with weird errors and while I have no direct evidence, I suspect something like the race above happened. This patch extends the page lock to being held until the pmd_numa is cleared to prevent migration starting in parallel while the pmd_numa is being cleared. It also flushes the old pmd entry and orders pagetable insertion before rmap insertion. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
8191acbd30
commit
a54a407fbf
2 changed files with 26 additions and 26 deletions
|
@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
target_nid = mpol_misplaced(page, vma, haddr);
|
target_nid = mpol_misplaced(page, vma, haddr);
|
||||||
if (target_nid == -1) {
|
if (target_nid == -1) {
|
||||||
/* If the page was locked, there are no parallel migrations */
|
/* If the page was locked, there are no parallel migrations */
|
||||||
if (page_locked) {
|
if (page_locked)
|
||||||
unlock_page(page);
|
|
||||||
goto clear_pmdnuma;
|
goto clear_pmdnuma;
|
||||||
}
|
|
||||||
|
|
||||||
/* Otherwise wait for potential migrations and retry fault */
|
/*
|
||||||
|
* Otherwise wait for potential migrations and retry. We do
|
||||||
|
* relock and check_same as the page may no longer be mapped.
|
||||||
|
* As the fault is being retried, do not account for it.
|
||||||
|
*/
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
wait_on_page_locked(page);
|
wait_on_page_locked(page);
|
||||||
|
page_nid = -1;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Page is misplaced, serialise migrations and parallel THP splits */
|
/* Page is misplaced, serialise migrations and parallel THP splits */
|
||||||
get_page(page);
|
get_page(page);
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
if (!page_locked) {
|
if (!page_locked)
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
page_locked = true;
|
|
||||||
}
|
|
||||||
anon_vma = page_lock_anon_vma_read(page);
|
anon_vma = page_lock_anon_vma_read(page);
|
||||||
|
|
||||||
/* Confirm the PMD did not change while page_table_lock was released */
|
/* Confirm the PMD did not change while page_table_lock was released */
|
||||||
|
@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
if (unlikely(!pmd_same(pmd, *pmdp))) {
|
if (unlikely(!pmd_same(pmd, *pmdp))) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
page_nid = -1;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Migrate the THP to the requested node */
|
/*
|
||||||
|
* Migrate the THP to the requested node, returns with page unlocked
|
||||||
|
* and pmd_numa cleared.
|
||||||
|
*/
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
migrated = migrate_misplaced_transhuge_page(mm, vma,
|
migrated = migrate_misplaced_transhuge_page(mm, vma,
|
||||||
pmdp, pmd, addr, page, target_nid);
|
pmdp, pmd, addr, page, target_nid);
|
||||||
if (migrated)
|
if (migrated)
|
||||||
page_nid = target_nid;
|
page_nid = target_nid;
|
||||||
else
|
|
||||||
goto check_same;
|
|
||||||
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
check_same:
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
if (unlikely(!pmd_same(pmd, *pmdp))) {
|
|
||||||
/* Someone else took our fault */
|
|
||||||
page_nid = -1;
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
clear_pmdnuma:
|
clear_pmdnuma:
|
||||||
|
BUG_ON(!PageLocked(page));
|
||||||
pmd = pmd_mknonnuma(pmd);
|
pmd = pmd_mknonnuma(pmd);
|
||||||
set_pmd_at(mm, haddr, pmdp, pmd);
|
set_pmd_at(mm, haddr, pmdp, pmd);
|
||||||
VM_BUG_ON(pmd_numa(*pmdp));
|
VM_BUG_ON(pmd_numa(*pmdp));
|
||||||
update_mmu_cache_pmd(vma, addr, pmdp);
|
update_mmu_cache_pmd(vma, addr, pmdp);
|
||||||
|
unlock_page(page);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
|
|
||||||
|
|
19
mm/migrate.c
19
mm/migrate.c
|
@ -1713,12 +1713,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
unlock_page(new_page);
|
unlock_page(new_page);
|
||||||
put_page(new_page); /* Free it */
|
put_page(new_page); /* Free it */
|
||||||
|
|
||||||
unlock_page(page);
|
/* Retake the callers reference and putback on LRU */
|
||||||
|
get_page(page);
|
||||||
putback_lru_page(page);
|
putback_lru_page(page);
|
||||||
|
mod_zone_page_state(page_zone(page),
|
||||||
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
|
||||||
isolated = 0;
|
goto out_fail;
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1735,9 +1735,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||||
entry = pmd_mkhuge(entry);
|
entry = pmd_mkhuge(entry);
|
||||||
|
|
||||||
page_add_new_anon_rmap(new_page, vma, haddr);
|
pmdp_clear_flush(vma, haddr, pmd);
|
||||||
|
|
||||||
set_pmd_at(mm, haddr, pmd, entry);
|
set_pmd_at(mm, haddr, pmd, entry);
|
||||||
|
page_add_new_anon_rmap(new_page, vma, haddr);
|
||||||
update_mmu_cache_pmd(vma, address, &entry);
|
update_mmu_cache_pmd(vma, address, &entry);
|
||||||
page_remove_rmap(page);
|
page_remove_rmap(page);
|
||||||
/*
|
/*
|
||||||
|
@ -1756,7 +1756,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
|
count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
|
||||||
count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
|
count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
|
||||||
|
|
||||||
out:
|
|
||||||
mod_zone_page_state(page_zone(page),
|
mod_zone_page_state(page_zone(page),
|
||||||
NR_ISOLATED_ANON + page_lru,
|
NR_ISOLATED_ANON + page_lru,
|
||||||
-HPAGE_PMD_NR);
|
-HPAGE_PMD_NR);
|
||||||
|
@ -1765,6 +1764,10 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
out_fail:
|
out_fail:
|
||||||
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
||||||
out_dropref:
|
out_dropref:
|
||||||
|
entry = pmd_mknonnuma(entry);
|
||||||
|
set_pmd_at(mm, haddr, pmd, entry);
|
||||||
|
update_mmu_cache_pmd(vma, address, &entry);
|
||||||
|
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in a new issue