thp, memcg: split hugepage for memcg oom on cow
On COW, a new hugepage is allocated and charged to the memcg. If the system is oom or the charge to the memcg fails, however, the fault handler will return VM_FAULT_OOM which results in an oom kill. Instead, it's possible to fallback to splitting the hugepage so that the COW results only in an order-0 page being allocated and charged to the memcg which has a higher liklihood to succeed. This is expensive because the hugepage must be split in the page fault handler, but it is much better than unnecessarily oom killing a process. Signed-off-by: David Rientjes <rientjes@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <jweiner@redhat.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
bde8bd8a1d
commit
1f1d06c34f
2 changed files with 18 additions and 3 deletions
|
@ -952,6 +952,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
|
||||
pmd, orig_pmd, page, haddr);
|
||||
if (ret & VM_FAULT_OOM)
|
||||
split_huge_page(page);
|
||||
put_page(page);
|
||||
goto out;
|
||||
}
|
||||
|
@ -959,6 +961,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
|
||||
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
|
||||
put_page(new_page);
|
||||
split_huge_page(page);
|
||||
put_page(page);
|
||||
ret |= VM_FAULT_OOM;
|
||||
goto out;
|
||||
|
|
18
mm/memory.c
18
mm/memory.c
|
@ -3486,6 +3486,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
if (unlikely(is_vm_hugetlb_page(vma)))
|
||||
return hugetlb_fault(mm, vma, address, flags);
|
||||
|
||||
retry:
|
||||
pgd = pgd_offset(mm, address);
|
||||
pud = pud_alloc(mm, pgd, address);
|
||||
if (!pud)
|
||||
|
@ -3499,13 +3500,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
pmd, flags);
|
||||
} else {
|
||||
pmd_t orig_pmd = *pmd;
|
||||
int ret;
|
||||
|
||||
barrier();
|
||||
if (pmd_trans_huge(orig_pmd)) {
|
||||
if (flags & FAULT_FLAG_WRITE &&
|
||||
!pmd_write(orig_pmd) &&
|
||||
!pmd_trans_splitting(orig_pmd))
|
||||
return do_huge_pmd_wp_page(mm, vma, address,
|
||||
pmd, orig_pmd);
|
||||
!pmd_trans_splitting(orig_pmd)) {
|
||||
ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
|
||||
orig_pmd);
|
||||
/*
|
||||
* If COW results in an oom, the huge pmd will
|
||||
* have been split, so retry the fault on the
|
||||
* pte for a smaller charge.
|
||||
*/
|
||||
if (unlikely(ret & VM_FAULT_OOM))
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue