diff --git a/include/linux/sched.h b/include/linux/sched.h index aecdc5a18773..d946195eec10 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1445,10 +1445,11 @@ struct task_struct { #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) #ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int node, int pages, bool migrated); +extern void task_numa_fault(int last_node, int node, int pages, bool migrated); extern void set_numabalancing_state(bool enabled); #else -static inline void task_numa_fault(int node, int pages, bool migrated) +static inline void task_numa_fault(int last_node, int node, int pages, + bool migrated) { } static inline void set_numabalancing_state(bool enabled) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8b15e9e1d1b8..89eeb89fd99a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -886,6 +886,20 @@ static unsigned int task_scan_max(struct task_struct *p) */ unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3; +static inline int task_faults_idx(int nid, int priv) +{ + return 2 * nid + priv; +} + +static inline unsigned long task_faults(struct task_struct *p, int nid) +{ + if (!p->numa_faults) + return 0; + + return p->numa_faults[task_faults_idx(nid, 0)] + + p->numa_faults[task_faults_idx(nid, 1)]; +} + static unsigned long weighted_cpuload(const int cpu); @@ -928,13 +942,19 @@ static void task_numa_placement(struct task_struct *p) /* Find the node with the highest number of faults */ for_each_online_node(nid) { unsigned long faults; + int priv, i; - /* Decay existing window and copy faults since last scan */ - p->numa_faults[nid] >>= 1; - p->numa_faults[nid] += p->numa_faults_buffer[nid]; - p->numa_faults_buffer[nid] = 0; + for (priv = 0; priv < 2; priv++) { + i = task_faults_idx(nid, priv); - faults = p->numa_faults[nid]; + /* Decay existing window, copy faults since last scan */ + p->numa_faults[i] >>= 1; + p->numa_faults[i] += p->numa_faults_buffer[i]; + p->numa_faults_buffer[i] = 0; + } + + /* Find maximum private faults */ + faults = p->numa_faults[task_faults_idx(nid, 1)]; if (faults > max_faults) { max_faults = faults; max_nid = nid; @@ -970,16 +990,20 @@ static void task_numa_placement(struct task_struct *p) /* * Got a PROT_NONE fault for a page on @node. */ -void task_numa_fault(int node, int pages, bool migrated) +void task_numa_fault(int last_nid, int node, int pages, bool migrated) { struct task_struct *p = current; + int priv; if (!numabalancing_enabled) return; + /* For now, do not attempt to detect private/shared accesses */ + priv = 1; + /* Allocate buffer to track faults on a per-node basis */ if (unlikely(!p->numa_faults)) { - int size = sizeof(*p->numa_faults) * nr_node_ids; + int size = sizeof(*p->numa_faults) * 2 * nr_node_ids; /* numa_faults and numa_faults_buffer share the allocation */ p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); @@ -987,7 +1011,7 @@ void task_numa_fault(int node, int pages, bool migrated) return; BUG_ON(p->numa_faults_buffer); - p->numa_faults_buffer = p->numa_faults + nr_node_ids; + p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids); } /* @@ -1005,7 +1029,7 @@ void task_numa_fault(int node, int pages, bool migrated) task_numa_placement(p); - p->numa_faults_buffer[node] += pages; + p->numa_faults_buffer[task_faults_idx(node, priv)] += pages; } static void reset_ptenuma_scan(struct task_struct *p) @@ -4146,7 +4170,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) return false; if (dst_nid == p->numa_preferred_nid || - p->numa_faults[dst_nid] > p->numa_faults[src_nid]) + task_faults(p, dst_nid) > task_faults(p, src_nid)) return true; return false; @@ -4170,7 +4194,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) p->numa_migrate_seq >= sysctl_numa_balancing_settle_count) return false; - if (p->numa_faults[dst_nid] < p->numa_faults[src_nid]) + if (task_faults(p, dst_nid) < task_faults(p, src_nid)) return true; return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8677dbf31c2e..914216733e0a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1282,7 +1282,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; int page_nid = -1, this_nid = numa_node_id(); - int target_nid; + int target_nid, last_nid = -1; bool page_locked; bool migrated = false; @@ -1293,6 +1293,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, page = pmd_page(pmd); BUG_ON(is_huge_zero_page(page)); page_nid = page_to_nid(page); + last_nid = page_nid_last(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (page_nid == this_nid) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); @@ -1361,7 +1362,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, page_unlock_anon_vma_read(anon_vma); if (page_nid != -1) - task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); + task_numa_fault(last_nid, page_nid, HPAGE_PMD_NR, migrated); return 0; } diff --git a/mm/memory.c b/mm/memory.c index ed51f15136ee..24bc9b848af6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3536,6 +3536,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page = NULL; spinlock_t *ptl; int page_nid = -1; + int last_nid; int target_nid; bool migrated = false; @@ -3566,6 +3567,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, } BUG_ON(is_zero_pfn(page_to_pfn(page))); + last_nid = page_nid_last(page); page_nid = page_to_nid(page); target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(ptep, ptl); @@ -3581,7 +3583,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, out: if (page_nid != -1) - task_numa_fault(page_nid, 1, migrated); + task_numa_fault(last_nid, page_nid, 1, migrated); return 0; } @@ -3596,6 +3598,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long offset; spinlock_t *ptl; bool numa = false; + int last_nid; spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3643,6 +3646,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(page_mapcount(page) != 1)) continue; + last_nid = page_nid_last(page); page_nid = page_to_nid(page); target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(pte, ptl); @@ -3655,7 +3659,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, } if (page_nid != -1) - task_numa_fault(page_nid, 1, migrated); + task_numa_fault(last_nid, page_nid, 1, migrated); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); }