Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: [PATCH] sched: debug feature - make the sched-domains tree runtime-tweakable [PATCH] sched: add above_background_load() function [PATCH] sched: update Documentation/sched-stats.txt [PATCH] sched: mark sysrq_sched_debug_show() static [PATCH] sched: make cpu_clock() not use the rq clock [PATCH] sched: remove unused rq->load_balance_class [PATCH] sched: arch preempt notifier mechanism [PATCH] sched: increase SCHED_LOAD_SCALE_FUZZ
This commit is contained in:
commit
257f49251c
6 changed files with 360 additions and 101 deletions
|
@ -1,10 +1,11 @@
|
||||||
Version 10 of schedstats includes support for sched_domains, which
|
Version 14 of schedstats includes support for sched_domains, which hit the
|
||||||
hit the mainline kernel in 2.6.7. Some counters make more sense to be
|
mainline kernel in 2.6.20 although it is identical to the stats from version
|
||||||
per-runqueue; other to be per-domain. Note that domains (and their associated
|
12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
|
||||||
information) will only be pertinent and available on machines utilizing
|
release). Some counters make more sense to be per-runqueue; other to be
|
||||||
CONFIG_SMP.
|
per-domain. Note that domains (and their associated information) will only
|
||||||
|
be pertinent and available on machines utilizing CONFIG_SMP.
|
||||||
|
|
||||||
In version 10 of schedstat, there is at least one level of domain
|
In version 14 of schedstat, there is at least one level of domain
|
||||||
statistics for each cpu listed, and there may well be more than one
|
statistics for each cpu listed, and there may well be more than one
|
||||||
domain. Domains have no particular names in this implementation, but
|
domain. Domains have no particular names in this implementation, but
|
||||||
the highest numbered one typically arbitrates balancing across all the
|
the highest numbered one typically arbitrates balancing across all the
|
||||||
|
@ -27,7 +28,7 @@ to write their own scripts, the fields are described here.
|
||||||
|
|
||||||
CPU statistics
|
CPU statistics
|
||||||
--------------
|
--------------
|
||||||
cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
|
cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12
|
||||||
|
|
||||||
NOTE: In the sched_yield() statistics, the active queue is considered empty
|
NOTE: In the sched_yield() statistics, the active queue is considered empty
|
||||||
if it has only one process in it, since obviously the process calling
|
if it has only one process in it, since obviously the process calling
|
||||||
|
@ -39,48 +40,20 @@ First four fields are sched_yield() statistics:
|
||||||
3) # of times just the expired queue was empty
|
3) # of times just the expired queue was empty
|
||||||
4) # of times sched_yield() was called
|
4) # of times sched_yield() was called
|
||||||
|
|
||||||
Next four are schedule() statistics:
|
Next three are schedule() statistics:
|
||||||
5) # of times the active queue had at least one other process on it
|
5) # of times we switched to the expired queue and reused it
|
||||||
6) # of times we switched to the expired queue and reused it
|
6) # of times schedule() was called
|
||||||
7) # of times schedule() was called
|
7) # of times schedule() left the processor idle
|
||||||
8) # of times schedule() left the processor idle
|
|
||||||
|
|
||||||
Next four are active_load_balance() statistics:
|
Next two are try_to_wake_up() statistics:
|
||||||
9) # of times active_load_balance() was called
|
8) # of times try_to_wake_up() was called
|
||||||
10) # of times active_load_balance() caused this cpu to gain a task
|
9) # of times try_to_wake_up() was called to wake up the local cpu
|
||||||
11) # of times active_load_balance() caused this cpu to lose a task
|
|
||||||
12) # of times active_load_balance() tried to move a task and failed
|
|
||||||
|
|
||||||
Next three are try_to_wake_up() statistics:
|
|
||||||
13) # of times try_to_wake_up() was called
|
|
||||||
14) # of times try_to_wake_up() successfully moved the awakening task
|
|
||||||
15) # of times try_to_wake_up() attempted to move the awakening task
|
|
||||||
|
|
||||||
Next two are wake_up_new_task() statistics:
|
|
||||||
16) # of times wake_up_new_task() was called
|
|
||||||
17) # of times wake_up_new_task() successfully moved the new task
|
|
||||||
|
|
||||||
Next one is a sched_migrate_task() statistic:
|
|
||||||
18) # of times sched_migrate_task() was called
|
|
||||||
|
|
||||||
Next one is a sched_balance_exec() statistic:
|
|
||||||
19) # of times sched_balance_exec() was called
|
|
||||||
|
|
||||||
Next three are statistics describing scheduling latency:
|
Next three are statistics describing scheduling latency:
|
||||||
20) sum of all time spent running by tasks on this processor (in ms)
|
10) sum of all time spent running by tasks on this processor (in jiffies)
|
||||||
21) sum of all time spent waiting to run by tasks on this processor (in ms)
|
11) sum of all time spent waiting to run by tasks on this processor (in
|
||||||
22) # of tasks (not necessarily unique) given to the processor
|
jiffies)
|
||||||
|
12) # of timeslices run on this cpu
|
||||||
The last six are statistics dealing with pull_task():
|
|
||||||
23) # of times pull_task() moved a task to this cpu when newly idle
|
|
||||||
24) # of times pull_task() stole a task from this cpu when another cpu
|
|
||||||
was newly idle
|
|
||||||
25) # of times pull_task() moved a task to this cpu when idle
|
|
||||||
26) # of times pull_task() stole a task from this cpu when another cpu
|
|
||||||
was idle
|
|
||||||
27) # of times pull_task() moved a task to this cpu when busy
|
|
||||||
28) # of times pull_task() stole a task from this cpu when another cpu
|
|
||||||
was busy
|
|
||||||
|
|
||||||
|
|
||||||
Domain statistics
|
Domain statistics
|
||||||
|
@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if
|
||||||
CONFIG_SMP is not defined, *no* domains are utilized and these lines
|
CONFIG_SMP is not defined, *no* domains are utilized and these lines
|
||||||
will not appear in the output.)
|
will not appear in the output.)
|
||||||
|
|
||||||
domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
|
domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
|
||||||
|
|
||||||
The first field is a bit mask indicating what cpus this domain operates over.
|
The first field is a bit mask indicating what cpus this domain operates over.
|
||||||
|
|
||||||
The next fifteen are a variety of load_balance() statistics:
|
The next 24 are a variety of load_balance() statistics in grouped into types
|
||||||
|
of idleness (idle, busy, and newly idle):
|
||||||
|
|
||||||
1) # of times in this domain load_balance() was called when the cpu
|
1) # of times in this domain load_balance() was called when the
|
||||||
was idle
|
cpu was idle
|
||||||
2) # of times in this domain load_balance() was called when the cpu
|
2) # of times in this domain load_balance() checked but found
|
||||||
was busy
|
the load did not require balancing when the cpu was idle
|
||||||
3) # of times in this domain load_balance() was called when the cpu
|
3) # of times in this domain load_balance() tried to move one or
|
||||||
was just becoming idle
|
more tasks and failed, when the cpu was idle
|
||||||
4) # of times in this domain load_balance() tried to move one or more
|
4) sum of imbalances discovered (if any) with each call to
|
||||||
tasks and failed, when the cpu was idle
|
load_balance() in this domain when the cpu was idle
|
||||||
5) # of times in this domain load_balance() tried to move one or more
|
5) # of times in this domain pull_task() was called when the cpu
|
||||||
tasks and failed, when the cpu was busy
|
was idle
|
||||||
6) # of times in this domain load_balance() tried to move one or more
|
6) # of times in this domain pull_task() was called even though
|
||||||
tasks and failed, when the cpu was just becoming idle
|
the target task was cache-hot when idle
|
||||||
7) sum of imbalances discovered (if any) with each call to
|
7) # of times in this domain load_balance() was called but did
|
||||||
load_balance() in this domain when the cpu was idle
|
not find a busier queue while the cpu was idle
|
||||||
8) sum of imbalances discovered (if any) with each call to
|
8) # of times in this domain a busier queue was found while the
|
||||||
load_balance() in this domain when the cpu was busy
|
cpu was idle but no busier group was found
|
||||||
9) sum of imbalances discovered (if any) with each call to
|
|
||||||
load_balance() in this domain when the cpu was just becoming idle
|
|
||||||
10) # of times in this domain load_balance() was called but did not find
|
|
||||||
a busier queue while the cpu was idle
|
|
||||||
11) # of times in this domain load_balance() was called but did not find
|
|
||||||
a busier queue while the cpu was busy
|
|
||||||
12) # of times in this domain load_balance() was called but did not find
|
|
||||||
a busier queue while the cpu was just becoming idle
|
|
||||||
13) # of times in this domain a busier queue was found while the cpu was
|
|
||||||
idle but no busier group was found
|
|
||||||
14) # of times in this domain a busier queue was found while the cpu was
|
|
||||||
busy but no busier group was found
|
|
||||||
15) # of times in this domain a busier queue was found while the cpu was
|
|
||||||
just becoming idle but no busier group was found
|
|
||||||
|
|
||||||
Next two are sched_balance_exec() statistics:
|
9) # of times in this domain load_balance() was called when the
|
||||||
17) # of times in this domain sched_balance_exec() successfully pushed
|
cpu was busy
|
||||||
a task to a new cpu
|
10) # of times in this domain load_balance() checked but found the
|
||||||
18) # of times in this domain sched_balance_exec() tried but failed to
|
load did not require balancing when busy
|
||||||
push a task to a new cpu
|
11) # of times in this domain load_balance() tried to move one or
|
||||||
|
more tasks and failed, when the cpu was busy
|
||||||
|
12) sum of imbalances discovered (if any) with each call to
|
||||||
|
load_balance() in this domain when the cpu was busy
|
||||||
|
13) # of times in this domain pull_task() was called when busy
|
||||||
|
14) # of times in this domain pull_task() was called even though the
|
||||||
|
target task was cache-hot when busy
|
||||||
|
15) # of times in this domain load_balance() was called but did not
|
||||||
|
find a busier queue while the cpu was busy
|
||||||
|
16) # of times in this domain a busier queue was found while the cpu
|
||||||
|
was busy but no busier group was found
|
||||||
|
|
||||||
Next two are try_to_wake_up() statistics:
|
17) # of times in this domain load_balance() was called when the
|
||||||
19) # of times in this domain try_to_wake_up() tried to move a task based
|
cpu was just becoming idle
|
||||||
on affinity and cache warmth
|
18) # of times in this domain load_balance() checked but found the
|
||||||
20) # of times in this domain try_to_wake_up() tried to move a task based
|
load did not require balancing when the cpu was just becoming idle
|
||||||
on load balancing
|
19) # of times in this domain load_balance() tried to move one or more
|
||||||
|
tasks and failed, when the cpu was just becoming idle
|
||||||
|
20) sum of imbalances discovered (if any) with each call to
|
||||||
|
load_balance() in this domain when the cpu was just becoming idle
|
||||||
|
21) # of times in this domain pull_task() was called when newly idle
|
||||||
|
22) # of times in this domain pull_task() was called even though the
|
||||||
|
target task was cache-hot when just becoming idle
|
||||||
|
23) # of times in this domain load_balance() was called but did not
|
||||||
|
find a busier queue while the cpu was just becoming idle
|
||||||
|
24) # of times in this domain a busier queue was found while the cpu
|
||||||
|
was just becoming idle but no busier group was found
|
||||||
|
|
||||||
|
Next three are active_load_balance() statistics:
|
||||||
|
25) # of times active_load_balance() was called
|
||||||
|
26) # of times active_load_balance() tried to move a task and failed
|
||||||
|
27) # of times active_load_balance() successfully moved a task
|
||||||
|
|
||||||
|
Next three are sched_balance_exec() statistics:
|
||||||
|
28) sbe_cnt is not used
|
||||||
|
29) sbe_balanced is not used
|
||||||
|
30) sbe_pushed is not used
|
||||||
|
|
||||||
|
Next three are sched_balance_fork() statistics:
|
||||||
|
31) sbf_cnt is not used
|
||||||
|
32) sbf_balanced is not used
|
||||||
|
33) sbf_pushed is not used
|
||||||
|
|
||||||
|
Next three are try_to_wake_up() statistics:
|
||||||
|
34) # of times in this domain try_to_wake_up() awoke a task that
|
||||||
|
last ran on a different cpu in this domain
|
||||||
|
35) # of times in this domain try_to_wake_up() moved a task to the
|
||||||
|
waking cpu because it was cache-cold on its own cpu anyway
|
||||||
|
36) # of times in this domain try_to_wake_up() started passive balancing
|
||||||
|
|
||||||
/proc/<pid>/schedstat
|
/proc/<pid>/schedstat
|
||||||
----------------
|
----------------
|
||||||
schedstats also adds a new /proc/<pid/schedstat file to include some of
|
schedstats also adds a new /proc/<pid/schedstat file to include some of
|
||||||
the same information on a per-process level. There are three fields in
|
the same information on a per-process level. There are three fields in
|
||||||
this file correlating to fields 20, 21, and 22 in the CPU fields, but
|
this file correlating for that process to:
|
||||||
they only apply for that process.
|
1) time spent on the cpu
|
||||||
|
2) time spent waiting on a runqueue
|
||||||
|
3) # of timeslices run on this cpu
|
||||||
|
|
||||||
A program could be easily written to make use of these extra fields to
|
A program could be easily written to make use of these extra fields to
|
||||||
report on how well a particular process or set of processes is faring
|
report on how well a particular process or set of processes is faring
|
||||||
under the scheduler's policies. A simple version of such a program is
|
under the scheduler's policies. A simple version of such a program is
|
||||||
available at
|
available at
|
||||||
http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c
|
http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include <linux/thread_info.h>
|
#include <linux/thread_info.h>
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_PREEMPT
|
#ifdef CONFIG_DEBUG_PREEMPT
|
||||||
extern void fastcall add_preempt_count(int val);
|
extern void fastcall add_preempt_count(int val);
|
||||||
|
@ -60,4 +61,47 @@ do { \
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||||
|
|
||||||
|
struct preempt_notifier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* preempt_ops - notifiers called when a task is preempted and rescheduled
|
||||||
|
* @sched_in: we're about to be rescheduled:
|
||||||
|
* notifier: struct preempt_notifier for the task being scheduled
|
||||||
|
* cpu: cpu we're scheduled on
|
||||||
|
* @sched_out: we've just been preempted
|
||||||
|
* notifier: struct preempt_notifier for the task being preempted
|
||||||
|
* next: the task that's kicking us out
|
||||||
|
*/
|
||||||
|
struct preempt_ops {
|
||||||
|
void (*sched_in)(struct preempt_notifier *notifier, int cpu);
|
||||||
|
void (*sched_out)(struct preempt_notifier *notifier,
|
||||||
|
struct task_struct *next);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* preempt_notifier - key for installing preemption notifiers
|
||||||
|
* @link: internal use
|
||||||
|
* @ops: defines the notifier functions to be called
|
||||||
|
*
|
||||||
|
* Usually used in conjunction with container_of().
|
||||||
|
*/
|
||||||
|
struct preempt_notifier {
|
||||||
|
struct hlist_node link;
|
||||||
|
struct preempt_ops *ops;
|
||||||
|
};
|
||||||
|
|
||||||
|
void preempt_notifier_register(struct preempt_notifier *notifier);
|
||||||
|
void preempt_notifier_unregister(struct preempt_notifier *notifier);
|
||||||
|
|
||||||
|
static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
||||||
|
struct preempt_ops *ops)
|
||||||
|
{
|
||||||
|
INIT_HLIST_NODE(¬ifier->link);
|
||||||
|
notifier->ops = ops;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __LINUX_PREEMPT_H */
|
#endif /* __LINUX_PREEMPT_H */
|
||||||
|
|
|
@ -681,7 +681,7 @@ enum cpu_idle_type {
|
||||||
#define SCHED_LOAD_SHIFT 10
|
#define SCHED_LOAD_SHIFT 10
|
||||||
#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
|
#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
|
||||||
|
|
||||||
#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5)
|
#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 1)
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
|
#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
|
||||||
|
@ -786,6 +786,22 @@ extern int partition_sched_domains(cpumask_t *partition1,
|
||||||
|
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A runqueue laden with a single nice 0 task scores a weighted_cpuload of
|
||||||
|
* SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a
|
||||||
|
* task of nice 0 or enough lower priority tasks to bring up the
|
||||||
|
* weighted_cpuload
|
||||||
|
*/
|
||||||
|
static inline int above_background_load(void)
|
||||||
|
{
|
||||||
|
unsigned long cpu;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
struct io_context; /* See blkdev.h */
|
struct io_context; /* See blkdev.h */
|
||||||
struct cpuset;
|
struct cpuset;
|
||||||
|
@ -935,6 +951,11 @@ struct task_struct {
|
||||||
struct sched_class *sched_class;
|
struct sched_class *sched_class;
|
||||||
struct sched_entity se;
|
struct sched_entity se;
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||||
|
/* list of struct preempt_notifier: */
|
||||||
|
struct hlist_head preempt_notifiers;
|
||||||
|
#endif
|
||||||
|
|
||||||
unsigned short ioprio;
|
unsigned short ioprio;
|
||||||
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
||||||
unsigned int btrace_seq;
|
unsigned int btrace_seq;
|
||||||
|
|
|
@ -63,3 +63,6 @@ config PREEMPT_BKL
|
||||||
Say Y here if you are building a kernel for a desktop system.
|
Say Y here if you are building a kernel for a desktop system.
|
||||||
Say N if you are unsure.
|
Say N if you are unsure.
|
||||||
|
|
||||||
|
config PREEMPT_NOTIFIERS
|
||||||
|
bool
|
||||||
|
|
||||||
|
|
204
kernel/sched.c
204
kernel/sched.c
|
@ -53,6 +53,7 @@
|
||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/sysctl.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/times.h>
|
#include <linux/times.h>
|
||||||
#include <linux/tsacct_kern.h>
|
#include <linux/tsacct_kern.h>
|
||||||
|
@ -263,8 +264,6 @@ struct rq {
|
||||||
unsigned int clock_warps, clock_overflows;
|
unsigned int clock_warps, clock_overflows;
|
||||||
unsigned int clock_unstable_events;
|
unsigned int clock_unstable_events;
|
||||||
|
|
||||||
struct sched_class *load_balance_class;
|
|
||||||
|
|
||||||
atomic_t nr_iowait;
|
atomic_t nr_iowait;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -385,13 +384,12 @@ static inline unsigned long long rq_clock(struct rq *rq)
|
||||||
*/
|
*/
|
||||||
unsigned long long cpu_clock(int cpu)
|
unsigned long long cpu_clock(int cpu)
|
||||||
{
|
{
|
||||||
struct rq *rq = cpu_rq(cpu);
|
|
||||||
unsigned long long now;
|
unsigned long long now;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&rq->lock, flags);
|
local_irq_save(flags);
|
||||||
now = rq_clock(rq);
|
now = rq_clock(cpu_rq(cpu));
|
||||||
spin_unlock_irqrestore(&rq->lock, flags);
|
local_irq_restore(flags);
|
||||||
|
|
||||||
return now;
|
return now;
|
||||||
}
|
}
|
||||||
|
@ -1592,6 +1590,10 @@ static void __sched_fork(struct task_struct *p)
|
||||||
INIT_LIST_HEAD(&p->run_list);
|
INIT_LIST_HEAD(&p->run_list);
|
||||||
p->se.on_rq = 0;
|
p->se.on_rq = 0;
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||||
|
INIT_HLIST_HEAD(&p->preempt_notifiers);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We mark the process as running here, but have not actually
|
* We mark the process as running here, but have not actually
|
||||||
* inserted it onto the runqueue yet. This guarantees that
|
* inserted it onto the runqueue yet. This guarantees that
|
||||||
|
@ -1673,6 +1675,63 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
|
||||||
task_rq_unlock(rq, &flags);
|
task_rq_unlock(rq, &flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||||
|
|
||||||
|
/**
|
||||||
|
* preempt_notifier_register - tell me when current is being being preempted
|
||||||
|
* and rescheduled
|
||||||
|
*/
|
||||||
|
void preempt_notifier_register(struct preempt_notifier *notifier)
|
||||||
|
{
|
||||||
|
hlist_add_head(¬ifier->link, ¤t->preempt_notifiers);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(preempt_notifier_register);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* preempt_notifier_unregister - no longer interested in preemption notifications
|
||||||
|
*
|
||||||
|
* This is safe to call from within a preemption notifier.
|
||||||
|
*/
|
||||||
|
void preempt_notifier_unregister(struct preempt_notifier *notifier)
|
||||||
|
{
|
||||||
|
hlist_del(¬ifier->link);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
|
||||||
|
|
||||||
|
static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
|
||||||
|
{
|
||||||
|
struct preempt_notifier *notifier;
|
||||||
|
struct hlist_node *node;
|
||||||
|
|
||||||
|
hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
|
||||||
|
notifier->ops->sched_in(notifier, raw_smp_processor_id());
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
fire_sched_out_preempt_notifiers(struct task_struct *curr,
|
||||||
|
struct task_struct *next)
|
||||||
|
{
|
||||||
|
struct preempt_notifier *notifier;
|
||||||
|
struct hlist_node *node;
|
||||||
|
|
||||||
|
hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
|
||||||
|
notifier->ops->sched_out(notifier, next);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
fire_sched_out_preempt_notifiers(struct task_struct *curr,
|
||||||
|
struct task_struct *next)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* prepare_task_switch - prepare to switch tasks
|
* prepare_task_switch - prepare to switch tasks
|
||||||
* @rq: the runqueue preparing to switch
|
* @rq: the runqueue preparing to switch
|
||||||
|
@ -1685,8 +1744,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
|
||||||
* prepare_task_switch sets up locking and calls architecture specific
|
* prepare_task_switch sets up locking and calls architecture specific
|
||||||
* hooks.
|
* hooks.
|
||||||
*/
|
*/
|
||||||
static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
|
static inline void
|
||||||
|
prepare_task_switch(struct rq *rq, struct task_struct *prev,
|
||||||
|
struct task_struct *next)
|
||||||
{
|
{
|
||||||
|
fire_sched_out_preempt_notifiers(prev, next);
|
||||||
prepare_lock_switch(rq, next);
|
prepare_lock_switch(rq, next);
|
||||||
prepare_arch_switch(next);
|
prepare_arch_switch(next);
|
||||||
}
|
}
|
||||||
|
@ -1728,6 +1790,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
|
||||||
prev_state = prev->state;
|
prev_state = prev->state;
|
||||||
finish_arch_switch(prev);
|
finish_arch_switch(prev);
|
||||||
finish_lock_switch(rq, prev);
|
finish_lock_switch(rq, prev);
|
||||||
|
fire_sched_in_preempt_notifiers(current);
|
||||||
if (mm)
|
if (mm)
|
||||||
mmdrop(mm);
|
mmdrop(mm);
|
||||||
if (unlikely(prev_state == TASK_DEAD)) {
|
if (unlikely(prev_state == TASK_DEAD)) {
|
||||||
|
@ -1768,7 +1831,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||||
{
|
{
|
||||||
struct mm_struct *mm, *oldmm;
|
struct mm_struct *mm, *oldmm;
|
||||||
|
|
||||||
prepare_task_switch(rq, next);
|
prepare_task_switch(rq, prev, next);
|
||||||
mm = next->mm;
|
mm = next->mm;
|
||||||
oldmm = prev->active_mm;
|
oldmm = prev->active_mm;
|
||||||
/*
|
/*
|
||||||
|
@ -5140,10 +5203,129 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
|
||||||
if (!next)
|
if (!next)
|
||||||
break;
|
break;
|
||||||
migrate_dead(dead_cpu, next);
|
migrate_dead(dead_cpu, next);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_HOTPLUG_CPU */
|
#endif /* CONFIG_HOTPLUG_CPU */
|
||||||
|
|
||||||
|
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
||||||
|
|
||||||
|
static struct ctl_table sd_ctl_dir[] = {
|
||||||
|
{CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, },
|
||||||
|
{0,},
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ctl_table sd_ctl_root[] = {
|
||||||
|
{CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, },
|
||||||
|
{0,},
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ctl_table *sd_alloc_ctl_entry(int n)
|
||||||
|
{
|
||||||
|
struct ctl_table *entry =
|
||||||
|
kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL);
|
||||||
|
|
||||||
|
BUG_ON(!entry);
|
||||||
|
memset(entry, 0, n * sizeof(struct ctl_table));
|
||||||
|
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
set_table_entry(struct ctl_table *entry, int ctl_name,
|
||||||
|
const char *procname, void *data, int maxlen,
|
||||||
|
mode_t mode, proc_handler *proc_handler)
|
||||||
|
{
|
||||||
|
entry->ctl_name = ctl_name;
|
||||||
|
entry->procname = procname;
|
||||||
|
entry->data = data;
|
||||||
|
entry->maxlen = maxlen;
|
||||||
|
entry->mode = mode;
|
||||||
|
entry->proc_handler = proc_handler;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ctl_table *
|
||||||
|
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||||
|
{
|
||||||
|
struct ctl_table *table = sd_alloc_ctl_entry(14);
|
||||||
|
|
||||||
|
set_table_entry(&table[0], 1, "min_interval", &sd->min_interval,
|
||||||
|
sizeof(long), 0644, proc_doulongvec_minmax);
|
||||||
|
set_table_entry(&table[1], 2, "max_interval", &sd->max_interval,
|
||||||
|
sizeof(long), 0644, proc_doulongvec_minmax);
|
||||||
|
set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time,
|
||||||
|
sizeof(long long), 0644, proc_doulongvec_minmax);
|
||||||
|
set_table_entry(&table[10], 11, "cache_nice_tries",
|
||||||
|
&sd->cache_nice_tries,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
set_table_entry(&table[12], 13, "flags", &sd->flags,
|
||||||
|
sizeof(int), 0644, proc_dointvec_minmax);
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
|
||||||
|
{
|
||||||
|
struct ctl_table *entry, *table;
|
||||||
|
struct sched_domain *sd;
|
||||||
|
int domain_num = 0, i;
|
||||||
|
char buf[32];
|
||||||
|
|
||||||
|
for_each_domain(cpu, sd)
|
||||||
|
domain_num++;
|
||||||
|
entry = table = sd_alloc_ctl_entry(domain_num + 1);
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for_each_domain(cpu, sd) {
|
||||||
|
snprintf(buf, 32, "domain%d", i);
|
||||||
|
entry->ctl_name = i + 1;
|
||||||
|
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||||
|
entry->mode = 0755;
|
||||||
|
entry->child = sd_alloc_ctl_domain_table(sd);
|
||||||
|
entry++;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ctl_table_header *sd_sysctl_header;
|
||||||
|
static void init_sched_domain_sysctl(void)
|
||||||
|
{
|
||||||
|
int i, cpu_num = num_online_cpus();
|
||||||
|
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
|
||||||
|
char buf[32];
|
||||||
|
|
||||||
|
sd_ctl_dir[0].child = entry;
|
||||||
|
|
||||||
|
for (i = 0; i < cpu_num; i++, entry++) {
|
||||||
|
snprintf(buf, 32, "cpu%d", i);
|
||||||
|
entry->ctl_name = i + 1;
|
||||||
|
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||||
|
entry->mode = 0755;
|
||||||
|
entry->child = sd_alloc_ctl_cpu_table(i);
|
||||||
|
}
|
||||||
|
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void init_sched_domain_sysctl(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* migration_call - callback that gets triggered when a CPU is added.
|
* migration_call - callback that gets triggered when a CPU is added.
|
||||||
* Here we can start up the necessary migration thread for the new CPU.
|
* Here we can start up the necessary migration thread for the new CPU.
|
||||||
|
@ -6249,6 +6431,8 @@ void __init sched_init_smp(void)
|
||||||
/* XXX: Theoretical race here - CPU may be hotplugged now */
|
/* XXX: Theoretical race here - CPU may be hotplugged now */
|
||||||
hotcpu_notifier(update_sched_domains, 0);
|
hotcpu_notifier(update_sched_domains, 0);
|
||||||
|
|
||||||
|
init_sched_domain_sysctl();
|
||||||
|
|
||||||
/* Move init over to a non-isolated CPU */
|
/* Move init over to a non-isolated CPU */
|
||||||
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
|
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
|
||||||
BUG();
|
BUG();
|
||||||
|
@ -6335,6 +6519,10 @@ void __init sched_init(void)
|
||||||
|
|
||||||
set_load_weight(&init_task);
|
set_load_weight(&init_task);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||||
|
INIT_HLIST_HEAD(&init_task.preempt_notifiers);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
nr_cpu_ids = highest_cpu + 1;
|
nr_cpu_ids = highest_cpu + 1;
|
||||||
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
|
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
|
||||||
|
|
|
@ -186,7 +186,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sysrq_sched_debug_show(void)
|
static void sysrq_sched_debug_show(void)
|
||||||
{
|
{
|
||||||
sched_debug_show(NULL, NULL);
|
sched_debug_show(NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue