mm, thp: change pmd_trans_huge_lock() to return taken lock

With split ptlock it's important to know which lock
pmd_trans_huge_lock() took.  This patch adds one more parameter to the
function to return the lock.

In most places migration to new api is trivial.  Exception is
move_huge_pmd(): we need to take two locks if pmd tables are different.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Kirill A. Shutemov 2013-11-14 14:30:54 -08:00 committed by Linus Torvalds
parent 9a86cb7bdc
commit bf929152e9
4 changed files with 46 additions and 31 deletions

View file

@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte; pte_t *pte;
spinlock_t *ptl; spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
spin_unlock(&walk->mm->page_table_lock); spin_unlock(ptl);
mss->anonymous_thp += HPAGE_PMD_SIZE; mss->anonymous_thp += HPAGE_PMD_SIZE;
return 0; return 0;
} }
@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct pagemapread *pm = walk->private; struct pagemapread *pm = walk->private;
spinlock_t *ptl;
pte_t *pte; pte_t *pte;
int err = 0; int err = 0;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* find the first VMA at or above 'addr' */ /* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr); vma = find_vma(walk->mm, addr);
if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
int pmd_flags2; int pmd_flags2;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (err) if (err)
break; break;
} }
spin_unlock(&walk->mm->page_table_lock); spin_unlock(ptl);
return err; return err;
} }
@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
md = walk->private; md = walk->private;
if (pmd_trans_huge_lock(pmd, md->vma) == 1) { if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
pte_t huge_pte = *(pte_t *)pmd; pte_t huge_pte = *(pte_t *)pmd;
struct page *page; struct page *page;
@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
if (page) if (page)
gather_stats(page, md, pte_dirty(huge_pte), gather_stats(page, md, pte_dirty(huge_pte),
HPAGE_PMD_SIZE/PAGE_SIZE); HPAGE_PMD_SIZE/PAGE_SIZE);
spin_unlock(&walk->mm->page_table_lock); spin_unlock(ptl);
return 0; return 0;
} }

View file

@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long end, unsigned long end,
long adjust_next); long adjust_next);
extern int __pmd_trans_huge_lock(pmd_t *pmd, extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
struct vm_area_struct *vma); spinlock_t **ptl);
/* mmap_sem must be held on entry */ /* mmap_sem must be held on entry */
static inline int pmd_trans_huge_lock(pmd_t *pmd, static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
struct vm_area_struct *vma) spinlock_t **ptl)
{ {
VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
if (pmd_trans_huge(*pmd)) if (pmd_trans_huge(*pmd))
return __pmd_trans_huge_lock(pmd, vma); return __pmd_trans_huge_lock(pmd, vma, ptl);
else else
return 0; return 0;
} }
@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next) long adjust_next)
{ {
} }
static inline int pmd_trans_huge_lock(pmd_t *pmd, static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
struct vm_area_struct *vma) spinlock_t **ptl)
{ {
return 0; return 0;
} }

View file

@ -1376,9 +1376,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr) pmd_t *pmd, unsigned long addr)
{ {
spinlock_t *ptl;
int ret = 0; int ret = 0;
if (__pmd_trans_huge_lock(pmd, vma) == 1) { if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
struct page *page; struct page *page;
pgtable_t pgtable; pgtable_t pgtable;
pmd_t orig_pmd; pmd_t orig_pmd;
@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
if (is_huge_zero_pmd(orig_pmd)) { if (is_huge_zero_pmd(orig_pmd)) {
atomic_long_dec(&tlb->mm->nr_ptes); atomic_long_dec(&tlb->mm->nr_ptes);
spin_unlock(&tlb->mm->page_table_lock); spin_unlock(ptl);
put_huge_zero_page(); put_huge_zero_page();
} else { } else {
page = pmd_page(orig_pmd); page = pmd_page(orig_pmd);
@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
VM_BUG_ON(!PageHead(page)); VM_BUG_ON(!PageHead(page));
atomic_long_dec(&tlb->mm->nr_ptes); atomic_long_dec(&tlb->mm->nr_ptes);
spin_unlock(&tlb->mm->page_table_lock); spin_unlock(ptl);
tlb_remove_page(tlb, page); tlb_remove_page(tlb, page);
} }
pte_free(tlb->mm, pgtable); pte_free(tlb->mm, pgtable);
@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
unsigned char *vec) unsigned char *vec)
{ {
spinlock_t *ptl;
int ret = 0; int ret = 0;
if (__pmd_trans_huge_lock(pmd, vma) == 1) { if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
/* /*
* All logical pages in the range are present * All logical pages in the range are present
* if backed by a huge page. * if backed by a huge page.
*/ */
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(ptl);
memset(vec, 1, (end - addr) >> PAGE_SHIFT); memset(vec, 1, (end - addr) >> PAGE_SHIFT);
ret = 1; ret = 1;
} }
@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long old_end, unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd) pmd_t *old_pmd, pmd_t *new_pmd)
{ {
spinlock_t *old_ptl, *new_ptl;
int ret = 0; int ret = 0;
pmd_t pmd; pmd_t pmd;
@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
goto out; goto out;
} }
ret = __pmd_trans_huge_lock(old_pmd, vma); /*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_sem prevents deadlock.
*/
ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
if (ret == 1) { if (ret == 1) {
new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
VM_BUG_ON(!pmd_none(*new_pmd)); VM_BUG_ON(!pmd_none(*new_pmd));
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
spin_unlock(&mm->page_table_lock); if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
} }
out: out:
return ret; return ret;
@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot, int prot_numa) unsigned long addr, pgprot_t newprot, int prot_numa)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
int ret = 0; int ret = 0;
if (__pmd_trans_huge_lock(pmd, vma) == 1) { if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pmd_t entry; pmd_t entry;
ret = 1; ret = 1;
if (!prot_numa) { if (!prot_numa) {
@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (ret == HPAGE_PMD_NR) if (ret == HPAGE_PMD_NR)
set_pmd_at(mm, addr, pmd, entry); set_pmd_at(mm, addr, pmd, entry);
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(ptl);
} }
return ret; return ret;
@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
* Note that if it returns 1, this routine returns without unlocking page * Note that if it returns 1, this routine returns without unlocking page
* table locks. So callers must unlock them. * table locks. So callers must unlock them.
*/ */
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{ {
spin_lock(&vma->vm_mm->page_table_lock); *ptl = pmd_lock(vma->vm_mm, pmd);
if (likely(pmd_trans_huge(*pmd))) { if (likely(pmd_trans_huge(*pmd))) {
if (unlikely(pmd_trans_splitting(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(*ptl);
wait_split_huge_page(vma->anon_vma, pmd); wait_split_huge_page(vma->anon_vma, pmd);
return -1; return -1;
} else { } else {
@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
return 1; return 1;
} }
} }
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(*ptl);
return 0; return 0;
} }

View file

@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
pte_t *pte; pte_t *pte;
spinlock_t *ptl; spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR; mc.precharge += HPAGE_PMD_NR;
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(ptl);
return 0; return 0;
} }
@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
* to be unlocked in __split_huge_page_splitting(), where the main * to be unlocked in __split_huge_page_splitting(), where the main
* part of thp split is not executed yet. * part of thp split is not executed yet.
*/ */
if (pmd_trans_huge_lock(pmd, vma) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (mc.precharge < HPAGE_PMD_NR) { if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(ptl);
return 0; return 0;
} }
target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
} }
put_page(page); put_page(page);
} }
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(ptl);
return 0; return 0;
} }