diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40f2d69fe029..98c7427d761b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9686,10 +9686,19 @@ static bool update_sd_pick_busiest(struct lb_env *env, if (sgs->group_type < busiest->group_type) return false; - if (env->prefer_spread && env->idle != CPU_NOT_IDLE && - (sgs->sum_nr_running > busiest->sum_nr_running) && - (sgs->group_util > busiest->group_util)) - return true; + /* + * This sg and busiest are classified as same. when prefer_spread + * is true, we want to maximize the chance of pulling taks, so + * prefer to pick sg with more runnable tasks and break the ties + * with utilization. + */ + if (env->prefer_spread) { + if (sgs->sum_nr_running < busiest->sum_nr_running) + return false; + if (sgs->sum_nr_running > busiest->sum_nr_running) + return true; + return sgs->group_util > busiest->group_util; + } if (sgs->avg_load <= busiest->avg_load) return false; @@ -9725,10 +9734,6 @@ static bool update_sd_pick_busiest(struct lb_env *env, asym_packing: - if (env->prefer_spread && - (sgs->sum_nr_running < busiest->sum_nr_running)) - return false; - /* This is the busiest node in its class. */ if (!(env->sd->flags & SD_ASYM_PACKING)) return true; @@ -10199,15 +10204,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return fix_small_imbalance(env, sds); } - - /* - * If we couldn't find any imbalance, then boost the imbalance - * with the group util. - */ - if (env->prefer_spread && !env->imbalance && - env->idle != CPU_NOT_IDLE && - busiest->sum_nr_running > busiest->group_weight) - env->imbalance = busiest->group_util; } /******* find_busiest_group() helpers end here *********************/ @@ -10243,7 +10239,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) int cpu_local, cpu_busiest; unsigned long capacity_local, capacity_busiest; - if (env->idle != CPU_NEWLY_IDLE) + if (env->idle != CPU_NEWLY_IDLE && !env->prefer_spread) goto out_balanced; if (!sds.local || !sds.busiest) @@ -10292,9 +10288,13 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* * When dst_cpu is idle, prevent SMP nice and/or asymmetric group * capacities from resulting in underutilization due to avg_load. + * + * When prefer_spread is enabled, force the balance even when + * busiest group has some capacity but loaded with more than 1 + * task. */ if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) && - busiest->group_no_capacity) + (busiest->group_no_capacity || env->prefer_spread)) goto force_balance; /* Misfit tasks should be dealt with regardless of the avg load */ @@ -10340,6 +10340,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* Looks like there is an imbalance. Compute it */ env->src_grp_type = busiest->group_type; calculate_imbalance(env, &sds); + + /* + * If we couldn't find any imbalance, then boost the imbalance + * based on the group util. + */ + if (!env->imbalance && env->prefer_spread) + env->imbalance = (busiest->group_util >> 1); + trace_sched_load_balance_stats(sds.busiest->cpumask[0], busiest->group_type, busiest->avg_load, busiest->load_per_task, sds.local->cpumask[0], @@ -10449,7 +10457,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, * to: wl_i * capacity_j > wl_j * capacity_i; where j is * our previous maximum. */ - if (wl * busiest_capacity > busiest_load * capacity) { + if (wl * busiest_capacity >= busiest_load * capacity) { busiest_load = wl; busiest_capacity = capacity; busiest = rq; @@ -10595,7 +10603,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, .loop = 0, }; - env.prefer_spread = (prefer_spread_on_idle(this_cpu) && + env.prefer_spread = (idle != CPU_NOT_IDLE && + prefer_spread_on_idle(this_cpu) && !((sd->flags & SD_ASYM_CPUCAPACITY) && !is_asym_cap_cpu(this_cpu)));