From 0947fbfea4997d1fac8b60862aa77be89983c443 Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Tue, 11 Dec 2018 18:03:55 +0530 Subject: [PATCH] mm: allow vmas with vm_ops to be speculatively handled Right now only anonymous pages are speculatively faulted, thus leaving out a large percentage of faults still requiring to take mmap_sem. These were left out since there can be fault handlers mainly in the fs layer which may use vma in unknown ways. This patch covers filemap_fault and shmem_fault and enables speculative fault for all those vmas with vm_ops. None of the other common fault handlers is seen to use or modify vma. Change-Id: I4f87909002f7ac64945f048ba0169a4cac7a15ab Signed-off-by: Vinayak Menon --- include/linux/vm_event_item.h | 3 ++- mm/filemap.c | 8 ++++---- mm/memory.c | 24 +++++++++--------------- mm/shmem.c | 4 ++-- mm/vmstat.c | 3 ++- 5 files changed, 19 insertions(+), 23 deletions(-) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index ca85428994d8..cee63ad71e53 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -111,7 +111,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PGPGOUTCLEAN, PSWPIN, PSWPOUT, SWAP_RA_HIT, #endif #ifdef CONFIG_SPECULATIVE_PAGE_FAULT - SPECULATIVE_PGFAULT, + SPECULATIVE_PGFAULT_ANON, + SPECULATIVE_PGFAULT_FILE, #endif NR_VM_EVENT_ITEMS }; diff --git a/mm/filemap.c b/mm/filemap.c index 653a9fd519c6..bbcbb44beeea 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2477,12 +2477,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ - if (vmf->vma->vm_flags & VM_RAND_READ) + if (vmf->vma_flags & VM_RAND_READ) return fpin; if (!ra->ra_pages) return fpin; - if (vmf->vma->vm_flags & VM_SEQ_READ) { + if (vmf->vma_flags & VM_SEQ_READ) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_sync_readahead(mapping, ra, file, offset, ra->ra_pages); @@ -2526,7 +2526,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ - if (vmf->vma->vm_flags & VM_RAND_READ) + if (vmf->vma_flags & VM_RAND_READ) return fpin; if (ra->mmap_miss > 0) ra->mmap_miss--; @@ -2549,7 +2549,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. * - * vma->vm_mm->mmap_sem must be held on entry. + * vma->vm_mm->mmap_sem must be held on entry (except FAULT_FLAG_SPECULATIVE). * * If our return value has VM_FAULT_RETRY set, it's because * lock_page_or_retry() returned 0. diff --git a/mm/memory.c b/mm/memory.c index 2edb33ac1507..ab7c597f0917 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4215,6 +4215,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; + int ret = 0; if (unlikely(pmd_none(*vmf->pmd))) { /* @@ -4267,8 +4268,6 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); - else if (vmf->flags & FAULT_FLAG_SPECULATIVE) - return VM_FAULT_RETRY; else return do_fault(vmf); } @@ -4302,10 +4301,12 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) */ if (vmf->flags & FAULT_FLAG_WRITE) flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + ret = VM_FAULT_RETRY; } unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + return ret; } /* @@ -4449,22 +4450,12 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, return VM_FAULT_RETRY; } - /* - * Can't call vm_ops service has we don't know what they would do - * with the VMA. - * This include huge page from hugetlbfs. - */ - if (vmf.vma->vm_ops) { - trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); - return VM_FAULT_RETRY; - } - /* * __anon_vma_prepare() requires the mmap_sem to be held * because vm_next and vm_prev must be safe. This can't be guaranteed * in the speculative path. */ - if (unlikely(!vmf.vma->anon_vma)) { + if (unlikely(vma_is_anonymous(vmf.vma) && !vmf.vma->anon_vma)) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } @@ -4606,7 +4597,10 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * If there is no need to retry, don't return the vma to the caller. */ if (ret != VM_FAULT_RETRY) { - count_vm_event(SPECULATIVE_PGFAULT); + if (vma_is_anonymous(vmf.vma)) + count_vm_event(SPECULATIVE_PGFAULT_ANON); + else + count_vm_event(SPECULATIVE_PGFAULT_FILE); put_vma(vmf.vma); *vma = NULL; } diff --git a/mm/shmem.c b/mm/shmem.c index 2311dfe40f75..8ad4588d2c5b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2023,10 +2023,10 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) sgp = SGP_CACHE; - if ((vma->vm_flags & VM_NOHUGEPAGE) || + if ((vmf->vma_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) sgp = SGP_NOHUGE; - else if (vma->vm_flags & VM_HUGEPAGE) + else if (vmf->vma_flags & VM_HUGEPAGE) sgp = SGP_HUGE; err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, diff --git a/mm/vmstat.c b/mm/vmstat.c index bfec4ac2c043..0c8a43fba9e0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1295,7 +1295,8 @@ const char * const vmstat_text[] = { "swap_ra_hit", #endif #ifdef CONFIG_SPECULATIVE_PAGE_FAULT - "speculative_pgfault" + "speculative_pgfault_anon", + "speculative_pgfault_file", #endif #endif /* CONFIG_VM_EVENT_COUNTERS */ };