mm, thp: change pmd_trans_huge_lock() to return taken lock
With split ptlock it's important to know which lock pmd_trans_huge_lock() took. This patch adds one more parameter to the function to return the lock. In most places migration to new api is trivial. Exception is move_huge_pmd(): we need to take two locks if pmd tables are different. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Alex Thorlton <athorlton@sgi.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kees Cook <keescook@chromium.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Robin Holt <robinmholt@gmail.com> Cc: Sedat Dilek <sedat.dilek@gmail.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
9a86cb7bdc
commit
bf929152e9
4 changed files with 46 additions and 31 deletions
|
@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
|
|
||||||
if (pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
|
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
|
||||||
spin_unlock(&walk->mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
mss->anonymous_thp += HPAGE_PMD_SIZE;
|
mss->anonymous_thp += HPAGE_PMD_SIZE;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
struct pagemapread *pm = walk->private;
|
struct pagemapread *pm = walk->private;
|
||||||
|
spinlock_t *ptl;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
|
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
|
||||||
|
|
||||||
/* find the first VMA at or above 'addr' */
|
/* find the first VMA at or above 'addr' */
|
||||||
vma = find_vma(walk->mm, addr);
|
vma = find_vma(walk->mm, addr);
|
||||||
if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
int pmd_flags2;
|
int pmd_flags2;
|
||||||
|
|
||||||
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
|
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
|
||||||
|
@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock(&walk->mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
|
||||||
|
|
||||||
md = walk->private;
|
md = walk->private;
|
||||||
|
|
||||||
if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
|
if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
|
||||||
pte_t huge_pte = *(pte_t *)pmd;
|
pte_t huge_pte = *(pte_t *)pmd;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
|
@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
|
||||||
if (page)
|
if (page)
|
||||||
gather_stats(page, md, pte_dirty(huge_pte),
|
gather_stats(page, md, pte_dirty(huge_pte),
|
||||||
HPAGE_PMD_SIZE/PAGE_SIZE);
|
HPAGE_PMD_SIZE/PAGE_SIZE);
|
||||||
spin_unlock(&walk->mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
|
||||||
unsigned long start,
|
unsigned long start,
|
||||||
unsigned long end,
|
unsigned long end,
|
||||||
long adjust_next);
|
long adjust_next);
|
||||||
extern int __pmd_trans_huge_lock(pmd_t *pmd,
|
extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||||
struct vm_area_struct *vma);
|
spinlock_t **ptl);
|
||||||
/* mmap_sem must be held on entry */
|
/* mmap_sem must be held on entry */
|
||||||
static inline int pmd_trans_huge_lock(pmd_t *pmd,
|
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||||
struct vm_area_struct *vma)
|
spinlock_t **ptl)
|
||||||
{
|
{
|
||||||
VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
|
VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
|
||||||
if (pmd_trans_huge(*pmd))
|
if (pmd_trans_huge(*pmd))
|
||||||
return __pmd_trans_huge_lock(pmd, vma);
|
return __pmd_trans_huge_lock(pmd, vma, ptl);
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
||||||
long adjust_next)
|
long adjust_next)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline int pmd_trans_huge_lock(pmd_t *pmd,
|
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||||
struct vm_area_struct *vma)
|
spinlock_t **ptl)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1376,9 +1376,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
pmd_t *pmd, unsigned long addr)
|
pmd_t *pmd, unsigned long addr)
|
||||||
{
|
{
|
||||||
|
spinlock_t *ptl;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
pgtable_t pgtable;
|
pgtable_t pgtable;
|
||||||
pmd_t orig_pmd;
|
pmd_t orig_pmd;
|
||||||
|
@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
|
pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
|
||||||
if (is_huge_zero_pmd(orig_pmd)) {
|
if (is_huge_zero_pmd(orig_pmd)) {
|
||||||
atomic_long_dec(&tlb->mm->nr_ptes);
|
atomic_long_dec(&tlb->mm->nr_ptes);
|
||||||
spin_unlock(&tlb->mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
put_huge_zero_page();
|
put_huge_zero_page();
|
||||||
} else {
|
} else {
|
||||||
page = pmd_page(orig_pmd);
|
page = pmd_page(orig_pmd);
|
||||||
|
@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
|
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
|
||||||
VM_BUG_ON(!PageHead(page));
|
VM_BUG_ON(!PageHead(page));
|
||||||
atomic_long_dec(&tlb->mm->nr_ptes);
|
atomic_long_dec(&tlb->mm->nr_ptes);
|
||||||
spin_unlock(&tlb->mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
tlb_remove_page(tlb, page);
|
tlb_remove_page(tlb, page);
|
||||||
}
|
}
|
||||||
pte_free(tlb->mm, pgtable);
|
pte_free(tlb->mm, pgtable);
|
||||||
|
@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
unsigned long addr, unsigned long end,
|
unsigned long addr, unsigned long end,
|
||||||
unsigned char *vec)
|
unsigned char *vec)
|
||||||
{
|
{
|
||||||
|
spinlock_t *ptl;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
/*
|
/*
|
||||||
* All logical pages in the range are present
|
* All logical pages in the range are present
|
||||||
* if backed by a huge page.
|
* if backed by a huge page.
|
||||||
*/
|
*/
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
memset(vec, 1, (end - addr) >> PAGE_SHIFT);
|
memset(vec, 1, (end - addr) >> PAGE_SHIFT);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
}
|
}
|
||||||
|
@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||||
unsigned long new_addr, unsigned long old_end,
|
unsigned long new_addr, unsigned long old_end,
|
||||||
pmd_t *old_pmd, pmd_t *new_pmd)
|
pmd_t *old_pmd, pmd_t *new_pmd)
|
||||||
{
|
{
|
||||||
|
spinlock_t *old_ptl, *new_ptl;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
pmd_t pmd;
|
pmd_t pmd;
|
||||||
|
|
||||||
|
@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __pmd_trans_huge_lock(old_pmd, vma);
|
/*
|
||||||
|
* We don't have to worry about the ordering of src and dst
|
||||||
|
* ptlocks because exclusive mmap_sem prevents deadlock.
|
||||||
|
*/
|
||||||
|
ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
|
||||||
if (ret == 1) {
|
if (ret == 1) {
|
||||||
|
new_ptl = pmd_lockptr(mm, new_pmd);
|
||||||
|
if (new_ptl != old_ptl)
|
||||||
|
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
|
||||||
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
|
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
|
||||||
VM_BUG_ON(!pmd_none(*new_pmd));
|
VM_BUG_ON(!pmd_none(*new_pmd));
|
||||||
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
|
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
|
||||||
spin_unlock(&mm->page_table_lock);
|
if (new_ptl != old_ptl)
|
||||||
|
spin_unlock(new_ptl);
|
||||||
|
spin_unlock(old_ptl);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
unsigned long addr, pgprot_t newprot, int prot_numa)
|
unsigned long addr, pgprot_t newprot, int prot_numa)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
|
spinlock_t *ptl;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
ret = 1;
|
ret = 1;
|
||||||
if (!prot_numa) {
|
if (!prot_numa) {
|
||||||
|
@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
if (ret == HPAGE_PMD_NR)
|
if (ret == HPAGE_PMD_NR)
|
||||||
set_pmd_at(mm, addr, pmd, entry);
|
set_pmd_at(mm, addr, pmd, entry);
|
||||||
|
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
* Note that if it returns 1, this routine returns without unlocking page
|
* Note that if it returns 1, this routine returns without unlocking page
|
||||||
* table locks. So callers must unlock them.
|
* table locks. So callers must unlock them.
|
||||||
*/
|
*/
|
||||||
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
|
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||||
|
spinlock_t **ptl)
|
||||||
{
|
{
|
||||||
spin_lock(&vma->vm_mm->page_table_lock);
|
*ptl = pmd_lock(vma->vm_mm, pmd);
|
||||||
if (likely(pmd_trans_huge(*pmd))) {
|
if (likely(pmd_trans_huge(*pmd))) {
|
||||||
if (unlikely(pmd_trans_splitting(*pmd))) {
|
if (unlikely(pmd_trans_splitting(*pmd))) {
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(*ptl);
|
||||||
wait_split_huge_page(vma->anon_vma, pmd);
|
wait_split_huge_page(vma->anon_vma, pmd);
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(*ptl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
|
|
||||||
if (pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
|
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
|
||||||
mc.precharge += HPAGE_PMD_NR;
|
mc.precharge += HPAGE_PMD_NR;
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
||||||
* to be unlocked in __split_huge_page_splitting(), where the main
|
* to be unlocked in __split_huge_page_splitting(), where the main
|
||||||
* part of thp split is not executed yet.
|
* part of thp split is not executed yet.
|
||||||
*/
|
*/
|
||||||
if (pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||||
if (mc.precharge < HPAGE_PMD_NR) {
|
if (mc.precharge < HPAGE_PMD_NR) {
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
|
target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
|
||||||
|
@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
||||||
}
|
}
|
||||||
put_page(page);
|
put_page(page);
|
||||||
}
|
}
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(ptl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue