[PATCH] mm: migration page refcounting fix
Migration code currently does not take a reference to target page properly, so between unlocking the pte and trying to take a new reference to the page with isolate_lru_page, anything could happen to it. Fix this by holding the pte lock until we get a chance to elevate the refcount. Other small cleanups while we're here. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
e236a166b2
commit
053837fce7
7 changed files with 76 additions and 75 deletions
|
@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Isolate one page from the LRU lists.
|
||||
*
|
||||
* - zone->lru_lock must be held
|
||||
*/
|
||||
static inline int __isolate_lru_page(struct page *page)
|
||||
{
|
||||
if (unlikely(!TestClearPageLRU(page)))
|
||||
return 0;
|
||||
|
||||
if (get_page_testone(page)) {
|
||||
/*
|
||||
* It is being freed elsewhere
|
||||
*/
|
||||
__put_page(page);
|
||||
SetPageLRU(page);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *));
|
|||
extern void FASTCALL(activate_page(struct page *));
|
||||
extern void FASTCALL(mark_page_accessed(struct page *));
|
||||
extern void lru_add_drain(void);
|
||||
extern int lru_add_drain_all(void);
|
||||
extern int rotate_reclaimable_page(struct page *page);
|
||||
extern void swap_setup(void);
|
||||
|
||||
|
|
|
@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
|
|||
* ->private_lock (try_to_unmap_one)
|
||||
* ->tree_lock (try_to_unmap_one)
|
||||
* ->zone.lru_lock (follow_page->mark_page_accessed)
|
||||
* ->zone.lru_lock (check_pte_range->isolate_lru_page)
|
||||
* ->private_lock (page_remove_rmap->set_page_dirty)
|
||||
* ->tree_lock (page_remove_rmap->set_page_dirty)
|
||||
* ->inode_lock (page_remove_rmap->set_page_dirty)
|
||||
|
|
|
@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
page = vm_normal_page(vma, addr, *pte);
|
||||
if (!page)
|
||||
continue;
|
||||
/*
|
||||
* The check for PageReserved here is important to avoid
|
||||
* handling zero pages and other pages that may have been
|
||||
* marked special by the system.
|
||||
*
|
||||
* If the PageReserved would not be checked here then f.e.
|
||||
* the location of the zero page could have an influence
|
||||
* on MPOL_MF_STRICT, zero pages would be counted for
|
||||
* the per node stats, and there would be useless attempts
|
||||
* to put zero pages on the migration list.
|
||||
*/
|
||||
if (PageReserved(page))
|
||||
continue;
|
||||
nid = page_to_nid(page);
|
||||
|
@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
|
||||
if (flags & MPOL_MF_STATS)
|
||||
gather_stats(page, private);
|
||||
else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
|
||||
spin_unlock(ptl);
|
||||
else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
|
||||
migrate_page_add(vma, page, private, flags);
|
||||
spin_lock(ptl);
|
||||
}
|
||||
else
|
||||
break;
|
||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||
|
@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
|||
int err;
|
||||
struct vm_area_struct *first, *vma, *prev;
|
||||
|
||||
/* Clear the LRU lists so pages can be isolated */
|
||||
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
|
||||
lru_add_drain_all();
|
||||
|
||||
first = find_vma(mm, start);
|
||||
if (!first)
|
||||
return ERR_PTR(-EFAULT);
|
||||
|
@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma,
|
|||
if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
|
||||
mapping_writably_mapped(page->mapping) ||
|
||||
single_mm_mapping(vma->vm_mm, page->mapping)) {
|
||||
int rc = isolate_lru_page(page);
|
||||
|
||||
if (rc == 1)
|
||||
if (isolate_lru_page(page))
|
||||
list_add(&page->lru, pagelist);
|
||||
/*
|
||||
* If the isolate attempt was not successful then we just
|
||||
* encountered an unswappable page. Something must be wrong.
|
||||
*/
|
||||
WARN_ON(rc == 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
* mapping->i_mmap_lock
|
||||
* anon_vma->lock
|
||||
* mm->page_table_lock or pte_lock
|
||||
* zone->lru_lock (in mark_page_accessed)
|
||||
* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
|
||||
* swap_lock (in swap_duplicate, swap_info_get)
|
||||
* mmlist_lock (in mmput, drain_mmlist and others)
|
||||
* mapping->private_lock (in __set_page_dirty_buffers)
|
||||
|
|
26
mm/swap.c
26
mm/swap.c
|
@ -174,6 +174,32 @@ void lru_add_drain(void)
|
|||
put_cpu();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static void lru_add_drain_per_cpu(void *dummy)
|
||||
{
|
||||
lru_add_drain();
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 0 for success
|
||||
*/
|
||||
int lru_add_drain_all(void)
|
||||
{
|
||||
return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Returns 0 for success
|
||||
*/
|
||||
int lru_add_drain_all(void)
|
||||
{
|
||||
lru_add_drain();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This path almost never happens for VM activity - pages are normally
|
||||
* freed via pagevecs. But it gets used by networking.
|
||||
|
|
71
mm/vmscan.c
71
mm/vmscan.c
|
@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page)
|
|||
}
|
||||
|
||||
/*
|
||||
* Add isolated pages on the list back to the LRU
|
||||
* Add isolated pages on the list back to the LRU.
|
||||
*
|
||||
* returns the number of pages put back.
|
||||
*/
|
||||
|
@ -760,46 +760,33 @@ int migrate_pages(struct list_head *from, struct list_head *to,
|
|||
return nr_failed + retry;
|
||||
}
|
||||
|
||||
static void lru_add_drain_per_cpu(void *dummy)
|
||||
{
|
||||
lru_add_drain();
|
||||
}
|
||||
|
||||
/*
|
||||
* Isolate one page from the LRU lists and put it on the
|
||||
* indicated list. Do necessary cache draining if the
|
||||
* page is not on the LRU lists yet.
|
||||
* indicated list with elevated refcount.
|
||||
*
|
||||
* Result:
|
||||
* 0 = page not on LRU list
|
||||
* 1 = page removed from LRU list and added to the specified list.
|
||||
* -ENOENT = page is being freed elsewhere.
|
||||
*/
|
||||
int isolate_lru_page(struct page *page)
|
||||
{
|
||||
int rc = 0;
|
||||
struct zone *zone = page_zone(page);
|
||||
int ret = 0;
|
||||
|
||||
redo:
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
rc = __isolate_lru_page(page);
|
||||
if (rc == 1) {
|
||||
if (PageActive(page))
|
||||
del_page_from_active_list(zone, page);
|
||||
else
|
||||
del_page_from_inactive_list(zone, page);
|
||||
if (PageLRU(page)) {
|
||||
struct zone *zone = page_zone(page);
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
if (TestClearPageLRU(page)) {
|
||||
ret = 1;
|
||||
get_page(page);
|
||||
if (PageActive(page))
|
||||
del_page_from_active_list(zone, page);
|
||||
else
|
||||
del_page_from_inactive_list(zone, page);
|
||||
}
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
}
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
if (rc == 0) {
|
||||
/*
|
||||
* Maybe this page is still waiting for a cpu to drain it
|
||||
* from one of the lru lists?
|
||||
*/
|
||||
rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
|
||||
if (rc == 0 && PageLRU(page))
|
||||
goto redo;
|
||||
}
|
||||
return rc;
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
|
|||
page = lru_to_page(src);
|
||||
prefetchw_prev_lru_page(page, src, flags);
|
||||
|
||||
switch (__isolate_lru_page(page)) {
|
||||
case 1:
|
||||
/* Succeeded to isolate page */
|
||||
list_move(&page->lru, dst);
|
||||
nr_taken++;
|
||||
break;
|
||||
case -ENOENT:
|
||||
/* Not possible to isolate */
|
||||
list_move(&page->lru, src);
|
||||
break;
|
||||
default:
|
||||
if (!TestClearPageLRU(page))
|
||||
BUG();
|
||||
list_del(&page->lru);
|
||||
if (get_page_testone(page)) {
|
||||
/*
|
||||
* It is being freed elsewhere
|
||||
*/
|
||||
__put_page(page);
|
||||
SetPageLRU(page);
|
||||
list_add(&page->lru, src);
|
||||
continue;
|
||||
} else {
|
||||
list_add(&page->lru, dst);
|
||||
nr_taken++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue