Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Three fixes for scheduler crashes, each triggers in relatively rare, hardware environment dependent situations" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Rework sched_fair time accounting math64: Add mul_u64_u32_shr() sched: Remove PREEMPT_NEED_RESCHED from generic code sched: Initialize power_orig for overlapping groups
This commit is contained in:
commit
dd0508093b
8 changed files with 127 additions and 108 deletions
|
@ -26,6 +26,7 @@ config X86
|
|||
select HAVE_AOUT if X86_32
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
select ARCH_SUPPORTS_INT128 if X86_64
|
||||
select ARCH_WANTS_PROT_NUMA_PROT_NONE
|
||||
select HAVE_IDE
|
||||
select HAVE_OPROFILE
|
||||
|
|
|
@ -7,6 +7,12 @@
|
|||
|
||||
DECLARE_PER_CPU(int, __preempt_count);
|
||||
|
||||
/*
|
||||
* We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
|
||||
* that a decrement hitting 0 means we can and should reschedule.
|
||||
*/
|
||||
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
|
||||
|
||||
/*
|
||||
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
|
||||
* that think a non-zero value indicates we cannot preempt.
|
||||
|
@ -74,6 +80,11 @@ static __always_inline void __preempt_count_sub(int val)
|
|||
__this_cpu_add_4(__preempt_count, -val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
|
||||
* a decrement which hits zero means we have no preempt_count and should
|
||||
* reschedule.
|
||||
*/
|
||||
static __always_inline bool __preempt_count_dec_and_test(void)
|
||||
{
|
||||
GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
|
||||
|
|
|
@ -3,13 +3,11 @@
|
|||
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
/*
|
||||
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
|
||||
* that think a non-zero value indicates we cannot preempt.
|
||||
*/
|
||||
#define PREEMPT_ENABLED (0)
|
||||
|
||||
static __always_inline int preempt_count(void)
|
||||
{
|
||||
return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
|
||||
return current_thread_info()->preempt_count;
|
||||
}
|
||||
|
||||
static __always_inline int *preempt_count_ptr(void)
|
||||
|
@ -17,11 +15,6 @@ static __always_inline int *preempt_count_ptr(void)
|
|||
return ¤t_thread_info()->preempt_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
|
||||
* alternative is loosing a reschedule. Better schedule too often -- also this
|
||||
* should be a very rare operation.
|
||||
*/
|
||||
static __always_inline void preempt_count_set(int pc)
|
||||
{
|
||||
*preempt_count_ptr() = pc;
|
||||
|
@ -41,28 +34,17 @@ static __always_inline void preempt_count_set(int pc)
|
|||
task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We fold the NEED_RESCHED bit into the preempt count such that
|
||||
* preempt_enable() can decrement and test for needing to reschedule with a
|
||||
* single instruction.
|
||||
*
|
||||
* We invert the actual bit, so that when the decrement hits 0 we know we both
|
||||
* need to resched (the bit is cleared) and can resched (no preempt count).
|
||||
*/
|
||||
|
||||
static __always_inline void set_preempt_need_resched(void)
|
||||
{
|
||||
*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
|
||||
}
|
||||
|
||||
static __always_inline void clear_preempt_need_resched(void)
|
||||
{
|
||||
*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
|
||||
}
|
||||
|
||||
static __always_inline bool test_preempt_need_resched(void)
|
||||
{
|
||||
return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -81,7 +63,12 @@ static __always_inline void __preempt_count_sub(int val)
|
|||
|
||||
static __always_inline bool __preempt_count_dec_and_test(void)
|
||||
{
|
||||
return !--*preempt_count_ptr();
|
||||
/*
|
||||
* Because of load-store architectures cannot do per-cpu atomic
|
||||
* operations; we cannot use PREEMPT_NEED_RESCHED because it might get
|
||||
* lost.
|
||||
*/
|
||||
return !--*preempt_count_ptr() && tif_need_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -89,7 +76,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
|
|||
*/
|
||||
static __always_inline bool should_resched(void)
|
||||
{
|
||||
return unlikely(!*preempt_count_ptr());
|
||||
return unlikely(!preempt_count() && tif_need_resched());
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
|
|
|
@ -133,4 +133,34 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
|
|||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
#ifndef mul_u64_u32_shr
|
||||
static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
|
||||
{
|
||||
return (u64)(((unsigned __int128)a * mul) >> shift);
|
||||
}
|
||||
#endif /* mul_u64_u32_shr */
|
||||
|
||||
#else
|
||||
|
||||
#ifndef mul_u64_u32_shr
|
||||
static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
|
||||
{
|
||||
u32 ah, al;
|
||||
u64 ret;
|
||||
|
||||
al = a;
|
||||
ah = a >> 32;
|
||||
|
||||
ret = ((u64)al * mul) >> shift;
|
||||
if (ah)
|
||||
ret += ((u64)ah * mul) << (32 - shift);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* mul_u64_u32_shr */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_MATH64_H */
|
||||
|
|
|
@ -440,8 +440,6 @@ struct task_cputime {
|
|||
.sum_exec_runtime = 0, \
|
||||
}
|
||||
|
||||
#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
|
||||
|
||||
#ifdef CONFIG_PREEMPT_COUNT
|
||||
#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
|
||||
#else
|
||||
|
@ -932,7 +930,8 @@ struct pipe_inode_info;
|
|||
struct uts_namespace;
|
||||
|
||||
struct load_weight {
|
||||
unsigned long weight, inv_weight;
|
||||
unsigned long weight;
|
||||
u32 inv_weight;
|
||||
};
|
||||
|
||||
struct sched_avg {
|
||||
|
|
|
@ -809,6 +809,12 @@ config GENERIC_SCHED_CLOCK
|
|||
config ARCH_SUPPORTS_NUMA_BALANCING
|
||||
bool
|
||||
|
||||
#
|
||||
# For architectures that know their GCC __int128 support is sound
|
||||
#
|
||||
config ARCH_SUPPORTS_INT128
|
||||
bool
|
||||
|
||||
# For architectures that (ab)use NUMA to represent different memory regions
|
||||
# all cpu-local but of different latencies, such as SuperH.
|
||||
#
|
||||
|
|
|
@ -5112,6 +5112,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
|||
* die on a /0 trap.
|
||||
*/
|
||||
sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
|
||||
sg->sgp->power_orig = sg->sgp->power;
|
||||
|
||||
/*
|
||||
* Make sure the first group of this domain contains the
|
||||
|
|
|
@ -178,40 +178,17 @@ void sched_init_granularity(void)
|
|||
update_sysctl();
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
# define WMULT_CONST (~0UL)
|
||||
#else
|
||||
# define WMULT_CONST (1UL << 32)
|
||||
#endif
|
||||
|
||||
#define WMULT_CONST (~0U)
|
||||
#define WMULT_SHIFT 32
|
||||
|
||||
/*
|
||||
* Shift right and round:
|
||||
*/
|
||||
#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
|
||||
|
||||
/*
|
||||
* delta *= weight / lw
|
||||
*/
|
||||
static unsigned long
|
||||
calc_delta_mine(unsigned long delta_exec, unsigned long weight,
|
||||
struct load_weight *lw)
|
||||
static void __update_inv_weight(struct load_weight *lw)
|
||||
{
|
||||
u64 tmp;
|
||||
unsigned long w;
|
||||
|
||||
/*
|
||||
* weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
|
||||
* entities since MIN_SHARES = 2. Treat weight as 1 if less than
|
||||
* 2^SCHED_LOAD_RESOLUTION.
|
||||
*/
|
||||
if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
|
||||
tmp = (u64)delta_exec * scale_load_down(weight);
|
||||
else
|
||||
tmp = (u64)delta_exec;
|
||||
if (likely(lw->inv_weight))
|
||||
return;
|
||||
|
||||
if (!lw->inv_weight) {
|
||||
unsigned long w = scale_load_down(lw->weight);
|
||||
w = scale_load_down(lw->weight);
|
||||
|
||||
if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
|
||||
lw->inv_weight = 1;
|
||||
|
@ -219,18 +196,43 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
|
|||
lw->inv_weight = WMULT_CONST;
|
||||
else
|
||||
lw->inv_weight = WMULT_CONST / w;
|
||||
}
|
||||
|
||||
/*
|
||||
* delta_exec * weight / lw.weight
|
||||
* OR
|
||||
* (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
|
||||
*
|
||||
* Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case
|
||||
* we're guaranteed shift stays positive because inv_weight is guaranteed to
|
||||
* fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
|
||||
*
|
||||
* Or, weight =< lw.weight (because lw.weight is the runqueue weight), thus
|
||||
* weight/lw.weight <= 1, and therefore our shift will also be positive.
|
||||
*/
|
||||
static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight *lw)
|
||||
{
|
||||
u64 fact = scale_load_down(weight);
|
||||
int shift = WMULT_SHIFT;
|
||||
|
||||
__update_inv_weight(lw);
|
||||
|
||||
if (unlikely(fact >> 32)) {
|
||||
while (fact >> 32) {
|
||||
fact >>= 1;
|
||||
shift--;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether we'd overflow the 64-bit multiplication:
|
||||
*/
|
||||
if (unlikely(tmp > WMULT_CONST))
|
||||
tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
|
||||
WMULT_SHIFT/2);
|
||||
else
|
||||
tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
|
||||
/* hint to use a 32x32->64 mul */
|
||||
fact = (u64)(u32)fact * lw->inv_weight;
|
||||
|
||||
return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
|
||||
while (fact >> 32) {
|
||||
fact >>= 1;
|
||||
shift--;
|
||||
}
|
||||
|
||||
return mul_u64_u32_shr(delta_exec, fact, shift);
|
||||
}
|
||||
|
||||
|
||||
|
@ -443,7 +445,7 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
|
|||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
static __always_inline
|
||||
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
|
||||
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
|
||||
|
||||
/**************************************************************
|
||||
* Scheduling class tree data structure manipulation methods:
|
||||
|
@ -612,11 +614,10 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
|
|||
/*
|
||||
* delta /= w
|
||||
*/
|
||||
static inline unsigned long
|
||||
calc_delta_fair(unsigned long delta, struct sched_entity *se)
|
||||
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
|
||||
{
|
||||
if (unlikely(se->load.weight != NICE_0_LOAD))
|
||||
delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
|
||||
delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
@ -665,7 +666,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
update_load_add(&lw, se->load.weight);
|
||||
load = &lw;
|
||||
}
|
||||
slice = calc_delta_mine(slice, se->load.weight, load);
|
||||
slice = __calc_delta(slice, se->load.weight, load);
|
||||
}
|
||||
return slice;
|
||||
}
|
||||
|
@ -703,47 +704,32 @@ void init_task_runnable_average(struct task_struct *p)
|
|||
#endif
|
||||
|
||||
/*
|
||||
* Update the current task's runtime statistics. Skip current tasks that
|
||||
* are not in our scheduling class.
|
||||
* Update the current task's runtime statistics.
|
||||
*/
|
||||
static inline void
|
||||
__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
|
||||
unsigned long delta_exec)
|
||||
{
|
||||
unsigned long delta_exec_weighted;
|
||||
|
||||
schedstat_set(curr->statistics.exec_max,
|
||||
max((u64)delta_exec, curr->statistics.exec_max));
|
||||
|
||||
curr->sum_exec_runtime += delta_exec;
|
||||
schedstat_add(cfs_rq, exec_clock, delta_exec);
|
||||
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
|
||||
|
||||
curr->vruntime += delta_exec_weighted;
|
||||
update_min_vruntime(cfs_rq);
|
||||
}
|
||||
|
||||
static void update_curr(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *curr = cfs_rq->curr;
|
||||
u64 now = rq_clock_task(rq_of(cfs_rq));
|
||||
unsigned long delta_exec;
|
||||
u64 delta_exec;
|
||||
|
||||
if (unlikely(!curr))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Get the amount of time the current task was running
|
||||
* since the last time we changed load (this cannot
|
||||
* overflow on 32 bits):
|
||||
*/
|
||||
delta_exec = (unsigned long)(now - curr->exec_start);
|
||||
if (!delta_exec)
|
||||
delta_exec = now - curr->exec_start;
|
||||
if (unlikely((s64)delta_exec <= 0))
|
||||
return;
|
||||
|
||||
__update_curr(cfs_rq, curr, delta_exec);
|
||||
curr->exec_start = now;
|
||||
|
||||
schedstat_set(curr->statistics.exec_max,
|
||||
max(delta_exec, curr->statistics.exec_max));
|
||||
|
||||
curr->sum_exec_runtime += delta_exec;
|
||||
schedstat_add(cfs_rq, exec_clock, delta_exec);
|
||||
|
||||
curr->vruntime += calc_delta_fair(delta_exec, curr);
|
||||
update_min_vruntime(cfs_rq);
|
||||
|
||||
if (entity_is_task(curr)) {
|
||||
struct task_struct *curtask = task_of(curr);
|
||||
|
||||
|
@ -3015,8 +3001,7 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
|||
}
|
||||
}
|
||||
|
||||
static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
|
||||
unsigned long delta_exec)
|
||||
static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
|
||||
{
|
||||
/* dock delta_exec before expiring quota (as it could span periods) */
|
||||
cfs_rq->runtime_remaining -= delta_exec;
|
||||
|
@ -3034,7 +3019,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
|
|||
}
|
||||
|
||||
static __always_inline
|
||||
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
|
||||
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
|
||||
{
|
||||
if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
|
||||
return;
|
||||
|
@ -3574,8 +3559,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
|
|||
return rq_clock_task(rq_of(cfs_rq));
|
||||
}
|
||||
|
||||
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
|
||||
unsigned long delta_exec) {}
|
||||
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
|
||||
static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
|
||||
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
||||
|
|
Loading…
Reference in a new issue