From ec7ab660fedffc12841ebf800311d88828d3d93e Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Fri, 19 Jun 2020 13:25:18 +0530 Subject: [PATCH] sched/fair: Tighten prefer_spread feature This patch tightens the prefer_spread feature by doing the following. (1) While picking the busiest group in update_sd_pick_busiest(), if the current group and busiest group are classified as same, use number of runnable tasks to break the ties. Use group load as the next tie breaker. Otherwise we may end up selecting the group with more utilization but with just 1 task. (2) Ignore average load checks when the load balancing CPU is idle and prefer_spread is set. (3) Allow no-hz idle balance CPUs to pull the tasks when the sched domain is not over-utilized but prefer_spread is set. (4) There are cases in calculate_imbalance() that skip imbalance override check due to which task are not getting pulled. Move this check to outside of calculate_imbalance() and set the imbalance to half of the group load. (5) when the weighted CPU load is 0, find_busiest_queue() can't find the busiest rq. Fix this as well. Change-Id: I93d1a62cbd4be34af993ae664a398aa868d29a0c Signed-off-by: Pavankumar Kondeti --- kernel/sched/fair.c | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40f2d69fe029..98c7427d761b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9686,10 +9686,19 @@ static bool update_sd_pick_busiest(struct lb_env *env, if (sgs->group_type < busiest->group_type) return false; - if (env->prefer_spread && env->idle != CPU_NOT_IDLE && - (sgs->sum_nr_running > busiest->sum_nr_running) && - (sgs->group_util > busiest->group_util)) - return true; + /* + * This sg and busiest are classified as same. when prefer_spread + * is true, we want to maximize the chance of pulling taks, so + * prefer to pick sg with more runnable tasks and break the ties + * with utilization. + */ + if (env->prefer_spread) { + if (sgs->sum_nr_running < busiest->sum_nr_running) + return false; + if (sgs->sum_nr_running > busiest->sum_nr_running) + return true; + return sgs->group_util > busiest->group_util; + } if (sgs->avg_load <= busiest->avg_load) return false; @@ -9725,10 +9734,6 @@ static bool update_sd_pick_busiest(struct lb_env *env, asym_packing: - if (env->prefer_spread && - (sgs->sum_nr_running < busiest->sum_nr_running)) - return false; - /* This is the busiest node in its class. */ if (!(env->sd->flags & SD_ASYM_PACKING)) return true; @@ -10199,15 +10204,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return fix_small_imbalance(env, sds); } - - /* - * If we couldn't find any imbalance, then boost the imbalance - * with the group util. - */ - if (env->prefer_spread && !env->imbalance && - env->idle != CPU_NOT_IDLE && - busiest->sum_nr_running > busiest->group_weight) - env->imbalance = busiest->group_util; } /******* find_busiest_group() helpers end here *********************/ @@ -10243,7 +10239,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) int cpu_local, cpu_busiest; unsigned long capacity_local, capacity_busiest; - if (env->idle != CPU_NEWLY_IDLE) + if (env->idle != CPU_NEWLY_IDLE && !env->prefer_spread) goto out_balanced; if (!sds.local || !sds.busiest) @@ -10292,9 +10288,13 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* * When dst_cpu is idle, prevent SMP nice and/or asymmetric group * capacities from resulting in underutilization due to avg_load. + * + * When prefer_spread is enabled, force the balance even when + * busiest group has some capacity but loaded with more than 1 + * task. */ if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) && - busiest->group_no_capacity) + (busiest->group_no_capacity || env->prefer_spread)) goto force_balance; /* Misfit tasks should be dealt with regardless of the avg load */ @@ -10340,6 +10340,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* Looks like there is an imbalance. Compute it */ env->src_grp_type = busiest->group_type; calculate_imbalance(env, &sds); + + /* + * If we couldn't find any imbalance, then boost the imbalance + * based on the group util. + */ + if (!env->imbalance && env->prefer_spread) + env->imbalance = (busiest->group_util >> 1); + trace_sched_load_balance_stats(sds.busiest->cpumask[0], busiest->group_type, busiest->avg_load, busiest->load_per_task, sds.local->cpumask[0], @@ -10449,7 +10457,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, * to: wl_i * capacity_j > wl_j * capacity_i; where j is * our previous maximum. */ - if (wl * busiest_capacity > busiest_load * capacity) { + if (wl * busiest_capacity >= busiest_load * capacity) { busiest_load = wl; busiest_capacity = capacity; busiest = rq; @@ -10595,7 +10603,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, .loop = 0, }; - env.prefer_spread = (prefer_spread_on_idle(this_cpu) && + env.prefer_spread = (idle != CPU_NOT_IDLE && + prefer_spread_on_idle(this_cpu) && !((sd->flags & SD_ASYM_CPUCAPACITY) && !is_asym_cap_cpu(this_cpu)));