diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9c4390f60c3e..f0e999379dd7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1827,11 +1827,110 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, } } +static void khugepaged_alloc_sleep(void) +{ + wait_event_freezable_timeout(khugepaged_wait, false, + msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); +} + +#ifdef CONFIG_NUMA +static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) +{ + if (IS_ERR(*hpage)) { + if (!*wait) + return false; + + *wait = false; + khugepaged_alloc_sleep(); + } else if (*hpage) { + put_page(*hpage); + *hpage = NULL; + } + + return true; +} + +static struct page +*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + int node) +{ + VM_BUG_ON(*hpage); + /* + * Allocate the page while the vma is still valid and under + * the mmap_sem read mode so there is no memory allocation + * later when we take the mmap_sem in write mode. This is more + * friendly behavior (OTOH it may actually hide bugs) to + * filesystems in userland with daemons allocating memory in + * the userland I/O paths. Allocating memory with the + * mmap_sem in read mode is good idea also to allow greater + * scalability. + */ + *hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address, + node, __GFP_OTHER_NODE); + + /* + * After allocating the hugepage, release the mmap_sem read lock in + * preparation for taking it in write mode. + */ + up_read(&mm->mmap_sem); + if (unlikely(!*hpage)) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); + *hpage = ERR_PTR(-ENOMEM); + return NULL; + } + + count_vm_event(THP_COLLAPSE_ALLOC); + return *hpage; +} +#else +static struct page *khugepaged_alloc_hugepage(bool *wait) +{ + struct page *hpage; + + do { + hpage = alloc_hugepage(khugepaged_defrag()); + if (!hpage) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); + if (!*wait) + return NULL; + + *wait = false; + khugepaged_alloc_sleep(); + } else + count_vm_event(THP_COLLAPSE_ALLOC); + } while (unlikely(!hpage) && likely(khugepaged_enabled())); + + return hpage; +} + +static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) +{ + if (!*hpage) + *hpage = khugepaged_alloc_hugepage(wait); + + if (unlikely(!*hpage)) + return false; + + return true; +} + +static struct page +*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + int node) +{ + up_read(&mm->mmap_sem); + VM_BUG_ON(!*hpage); + return *hpage; +} +#endif + static void collapse_huge_page(struct mm_struct *mm, - unsigned long address, - struct page **hpage, - struct vm_area_struct *vma, - int node) + unsigned long address, + struct page **hpage, + struct vm_area_struct *vma, + int node) { pgd_t *pgd; pud_t *pud; @@ -1844,38 +1943,11 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long hstart, hend; VM_BUG_ON(address & ~HPAGE_PMD_MASK); -#ifndef CONFIG_NUMA - up_read(&mm->mmap_sem); - VM_BUG_ON(!*hpage); - new_page = *hpage; -#else - VM_BUG_ON(*hpage); - /* - * Allocate the page while the vma is still valid and under - * the mmap_sem read mode so there is no memory allocation - * later when we take the mmap_sem in write mode. This is more - * friendly behavior (OTOH it may actually hide bugs) to - * filesystems in userland with daemons allocating memory in - * the userland I/O paths. Allocating memory with the - * mmap_sem in read mode is good idea also to allow greater - * scalability. - */ - new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address, - node, __GFP_OTHER_NODE); - /* - * After allocating the hugepage, release the mmap_sem read lock in - * preparation for taking it in write mode. - */ - up_read(&mm->mmap_sem); - if (unlikely(!new_page)) { - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); - *hpage = ERR_PTR(-ENOMEM); + /* release the mmap_sem read lock. */ + new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); + if (!new_page) return; - } - *hpage = new_page; - count_vm_event(THP_COLLAPSE_ALLOC); -#endif if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) return; @@ -2215,34 +2287,6 @@ static int khugepaged_wait_event(void) kthread_should_stop(); } -static void khugepaged_alloc_sleep(void) -{ - wait_event_freezable_timeout(khugepaged_wait, false, - msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); -} - -#ifndef CONFIG_NUMA -static struct page *khugepaged_alloc_hugepage(bool *wait) -{ - struct page *hpage; - - do { - hpage = alloc_hugepage(khugepaged_defrag()); - if (!hpage) { - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); - if (!*wait) - return NULL; - - *wait = false; - khugepaged_alloc_sleep(); - } else - count_vm_event(THP_COLLAPSE_ALLOC); - } while (unlikely(!hpage) && likely(khugepaged_enabled())); - - return hpage; -} -#endif - static void khugepaged_do_scan(void) { struct page *hpage = NULL; @@ -2253,23 +2297,9 @@ static void khugepaged_do_scan(void) barrier(); /* write khugepaged_pages_to_scan to local stack */ while (progress < pages) { -#ifndef CONFIG_NUMA - if (!hpage) - hpage = khugepaged_alloc_hugepage(&wait); - - if (unlikely(!hpage)) + if (!khugepaged_prealloc_page(&hpage, &wait)) break; -#else - if (IS_ERR(hpage)) { - if (!wait) - break; - wait = false; - khugepaged_alloc_sleep(); - } else if (hpage) { - put_page(hpage); - hpage = NULL; - } -#endif + cond_resched(); if (unlikely(kthread_should_stop() || freezing(current)))