sched/fair: Tighten prefer_spread feature

This patch tightens the prefer_spread feature by doing the
following.

(1) While picking the busiest group in update_sd_pick_busiest(),
if the current group and busiest group are classified as same,
use number of runnable tasks to break the ties. Use group load
as the next tie breaker. Otherwise we may end up selecting the
group with more utilization but with just 1 task.

(2) Ignore average load checks when the load balancing CPU is
idle and prefer_spread is set.

(3) Allow no-hz idle balance CPUs to pull the tasks when the
sched domain is not over-utilized but prefer_spread is set.

(4) There are cases in calculate_imbalance() that skip imbalance
override check due to which task are not getting pulled. Move this
check to outside of calculate_imbalance() and set the imbalance to
half of the group load.

(5) when the weighted CPU load is 0, find_busiest_queue() can't
find the busiest rq. Fix this as well.

Change-Id: I93d1a62cbd4be34af993ae664a398aa868d29a0c
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
This commit is contained in:
Pavankumar Kondeti 2020-06-19 13:25:18 +05:30
parent 37e541f76d
commit ec7ab660fe

View file

@ -9686,10 +9686,19 @@ static bool update_sd_pick_busiest(struct lb_env *env,
if (sgs->group_type < busiest->group_type) if (sgs->group_type < busiest->group_type)
return false; return false;
if (env->prefer_spread && env->idle != CPU_NOT_IDLE && /*
(sgs->sum_nr_running > busiest->sum_nr_running) && * This sg and busiest are classified as same. when prefer_spread
(sgs->group_util > busiest->group_util)) * is true, we want to maximize the chance of pulling taks, so
return true; * prefer to pick sg with more runnable tasks and break the ties
* with utilization.
*/
if (env->prefer_spread) {
if (sgs->sum_nr_running < busiest->sum_nr_running)
return false;
if (sgs->sum_nr_running > busiest->sum_nr_running)
return true;
return sgs->group_util > busiest->group_util;
}
if (sgs->avg_load <= busiest->avg_load) if (sgs->avg_load <= busiest->avg_load)
return false; return false;
@ -9725,10 +9734,6 @@ static bool update_sd_pick_busiest(struct lb_env *env,
asym_packing: asym_packing:
if (env->prefer_spread &&
(sgs->sum_nr_running < busiest->sum_nr_running))
return false;
/* This is the busiest node in its class. */ /* This is the busiest node in its class. */
if (!(env->sd->flags & SD_ASYM_PACKING)) if (!(env->sd->flags & SD_ASYM_PACKING))
return true; return true;
@ -10199,15 +10204,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
return fix_small_imbalance(env, sds); return fix_small_imbalance(env, sds);
} }
/*
* If we couldn't find any imbalance, then boost the imbalance
* with the group util.
*/
if (env->prefer_spread && !env->imbalance &&
env->idle != CPU_NOT_IDLE &&
busiest->sum_nr_running > busiest->group_weight)
env->imbalance = busiest->group_util;
} }
/******* find_busiest_group() helpers end here *********************/ /******* find_busiest_group() helpers end here *********************/
@ -10243,7 +10239,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
int cpu_local, cpu_busiest; int cpu_local, cpu_busiest;
unsigned long capacity_local, capacity_busiest; unsigned long capacity_local, capacity_busiest;
if (env->idle != CPU_NEWLY_IDLE) if (env->idle != CPU_NEWLY_IDLE && !env->prefer_spread)
goto out_balanced; goto out_balanced;
if (!sds.local || !sds.busiest) if (!sds.local || !sds.busiest)
@ -10292,9 +10288,13 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
/* /*
* When dst_cpu is idle, prevent SMP nice and/or asymmetric group * When dst_cpu is idle, prevent SMP nice and/or asymmetric group
* capacities from resulting in underutilization due to avg_load. * capacities from resulting in underutilization due to avg_load.
*
* When prefer_spread is enabled, force the balance even when
* busiest group has some capacity but loaded with more than 1
* task.
*/ */
if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) && if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) &&
busiest->group_no_capacity) (busiest->group_no_capacity || env->prefer_spread))
goto force_balance; goto force_balance;
/* Misfit tasks should be dealt with regardless of the avg load */ /* Misfit tasks should be dealt with regardless of the avg load */
@ -10340,6 +10340,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
/* Looks like there is an imbalance. Compute it */ /* Looks like there is an imbalance. Compute it */
env->src_grp_type = busiest->group_type; env->src_grp_type = busiest->group_type;
calculate_imbalance(env, &sds); calculate_imbalance(env, &sds);
/*
* If we couldn't find any imbalance, then boost the imbalance
* based on the group util.
*/
if (!env->imbalance && env->prefer_spread)
env->imbalance = (busiest->group_util >> 1);
trace_sched_load_balance_stats(sds.busiest->cpumask[0], trace_sched_load_balance_stats(sds.busiest->cpumask[0],
busiest->group_type, busiest->avg_load, busiest->group_type, busiest->avg_load,
busiest->load_per_task, sds.local->cpumask[0], busiest->load_per_task, sds.local->cpumask[0],
@ -10449,7 +10457,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
* to: wl_i * capacity_j > wl_j * capacity_i; where j is * to: wl_i * capacity_j > wl_j * capacity_i; where j is
* our previous maximum. * our previous maximum.
*/ */
if (wl * busiest_capacity > busiest_load * capacity) { if (wl * busiest_capacity >= busiest_load * capacity) {
busiest_load = wl; busiest_load = wl;
busiest_capacity = capacity; busiest_capacity = capacity;
busiest = rq; busiest = rq;
@ -10595,7 +10603,8 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.loop = 0, .loop = 0,
}; };
env.prefer_spread = (prefer_spread_on_idle(this_cpu) && env.prefer_spread = (idle != CPU_NOT_IDLE &&
prefer_spread_on_idle(this_cpu) &&
!((sd->flags & SD_ASYM_CPUCAPACITY) && !((sd->flags & SD_ASYM_CPUCAPACITY) &&
!is_asym_cap_cpu(this_cpu))); !is_asym_cap_cpu(this_cpu)));