[PATCH] htlb forget rss with pt sharing
Imprecise RSS accounting is an irritating ill effect with pt sharing. After consulted with several VM experts, I have tried various methods to solve that problem: (1) iterate through all mm_structs that share the PT and increment count; (2) keep RSS count in page table structure and then sum them up at reporting time. None of the above methods yield any satisfactory implementation. Since process RSS accounting is pure information only, I propose we don't count them at all for hugetlb page. rlimit has such field, though there is absolutely no enforcement on limiting that resource. One other method is to account all RSS at hugetlb mmap time regardless they are faulted or not. I opt for the simplicity of no accounting at all. Hugetlb page are special, they are reserved up front in global reservation pool and is not reclaimable. From physical memory resource point of view, it is already consumed regardless whether there are users using them. If the concern is that RSS can be used to control resource allocation, we already can specify hugetlb fs size limit and sysadmin can enforce that at mount time. Combined with the two points mentioned above, I fail to see if there is anything got affected because of this patch. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Hugh Dickins <hugh@veritas.com> Cc: Dave McCracken <dmccr@us.ibm.com> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Adam Litke <agl@us.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
39dde65c99
commit
cace673d37
1 changed files with 0 additions and 8 deletions
|
@ -344,7 +344,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||||
entry = *src_pte;
|
entry = *src_pte;
|
||||||
ptepage = pte_page(entry);
|
ptepage = pte_page(entry);
|
||||||
get_page(ptepage);
|
get_page(ptepage);
|
||||||
add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE);
|
|
||||||
set_huge_pte_at(dst, addr, dst_pte, entry);
|
set_huge_pte_at(dst, addr, dst_pte, entry);
|
||||||
}
|
}
|
||||||
spin_unlock(&src->page_table_lock);
|
spin_unlock(&src->page_table_lock);
|
||||||
|
@ -377,10 +376,6 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
|
||||||
BUG_ON(end & ~HPAGE_MASK);
|
BUG_ON(end & ~HPAGE_MASK);
|
||||||
|
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
|
|
||||||
/* Update high watermark before we lower rss */
|
|
||||||
update_hiwater_rss(mm);
|
|
||||||
|
|
||||||
for (address = start; address < end; address += HPAGE_SIZE) {
|
for (address = start; address < end; address += HPAGE_SIZE) {
|
||||||
ptep = huge_pte_offset(mm, address);
|
ptep = huge_pte_offset(mm, address);
|
||||||
if (!ptep)
|
if (!ptep)
|
||||||
|
@ -395,9 +390,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
|
||||||
|
|
||||||
page = pte_page(pte);
|
page = pte_page(pte);
|
||||||
list_add(&page->lru, &page_list);
|
list_add(&page->lru, &page_list);
|
||||||
add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
flush_tlb_range(vma, start, end);
|
flush_tlb_range(vma, start, end);
|
||||||
list_for_each_entry_safe(page, tmp, &page_list, lru) {
|
list_for_each_entry_safe(page, tmp, &page_list, lru) {
|
||||||
|
@ -523,7 +516,6 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
if (!pte_none(*ptep))
|
if (!pte_none(*ptep))
|
||||||
goto backout;
|
goto backout;
|
||||||
|
|
||||||
add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
|
|
||||||
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
|
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
|
||||||
&& (vma->vm_flags & VM_SHARED)));
|
&& (vma->vm_flags & VM_SHARED)));
|
||||||
set_huge_pte_at(mm, address, ptep, new_pte);
|
set_huge_pte_at(mm, address, ptep, new_pte);
|
||||||
|
|
Loading…
Reference in a new issue