2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* linux/kernel/time/tick-sched.c
|
|
|
|
*
|
|
|
|
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
|
|
|
|
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
|
|
|
|
*
|
|
|
|
* No idle tick implementation for low and high resolution timers
|
|
|
|
*
|
|
|
|
* Started by: Thomas Gleixner and Ingo Molnar
|
|
|
|
*
|
2008-01-30 05:30:00 -07:00
|
|
|
* Distribute under GPLv2.
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/hrtimer.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/kernel_stat.h>
|
|
|
|
#include <linux/percpu.h>
|
2017-02-08 10:51:31 -07:00
|
|
|
#include <linux/nmi.h>
|
2007-02-16 02:28:03 -07:00
|
|
|
#include <linux/profile.h>
|
2017-02-08 10:51:30 -07:00
|
|
|
#include <linux/sched/signal.h>
|
2017-02-01 08:36:40 -07:00
|
|
|
#include <linux/sched/clock.h>
|
2017-02-08 10:51:35 -07:00
|
|
|
#include <linux/sched/stat.h>
|
2017-02-08 10:51:35 -07:00
|
|
|
#include <linux/sched/nohz.h>
|
2008-08-04 12:59:11 -06:00
|
|
|
#include <linux/module.h>
|
2012-11-07 13:03:07 -07:00
|
|
|
#include <linux/irq_work.h>
|
2013-04-20 07:43:57 -06:00
|
|
|
#include <linux/posix-timers.h>
|
2017-05-18 00:26:09 -06:00
|
|
|
#include <linux/timer.h>
|
2013-07-09 16:55:25 -06:00
|
|
|
#include <linux/context_tracking.h>
|
2017-08-29 07:07:54 -06:00
|
|
|
#include <linux/mm.h>
|
2016-08-23 17:07:11 -06:00
|
|
|
#include <linux/rq_stats.h>
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2007-02-24 23:10:13 -07:00
|
|
|
#include <asm/irq_regs.h>
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
#include "tick-internal.h"
|
|
|
|
|
2013-04-20 09:35:50 -06:00
|
|
|
#include <trace/events/timer.h>
|
|
|
|
|
2016-08-23 17:07:11 -06:00
|
|
|
struct rq_data rq_info;
|
|
|
|
struct workqueue_struct *rq_wq;
|
|
|
|
spinlock_t rq_lock;
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
2016-07-01 04:42:35 -06:00
|
|
|
* Per-CPU nohz control structure
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
2015-03-25 06:07:37 -06:00
|
|
|
static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
[PATCH] Add debugging feature /proc/timer_list
add /proc/timer_list, which prints all currently pending (high-res) timers,
all clock-event sources and their parameters in a human-readable form.
Sample output:
Timer List Version: v0.1
HRTIMER_MAX_CLOCK_BASES: 2
now at 4246046273872 nsecs
cpu: 0
clock 0:
.index: 0
.resolution: 1 nsecs
.get_time: ktime_get_real
.offset: 1273998312645738432 nsecs
active timers:
clock 1:
.index: 1
.resolution: 1 nsecs
.get_time: ktime_get
.offset: 0 nsecs
active timers:
#0: <f5a90ec8>, hrtimer_sched_tick, hrtimer_stop_sched_tick, swapper/0
# expires at 4246432689566 nsecs [in 386415694 nsecs]
#1: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, pcscd/2050
# expires at 4247018194689 nsecs [in 971920817 nsecs]
#2: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, irqbalance/1909
# expires at 4247351358392 nsecs [in 1305084520 nsecs]
#3: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, crond/2157
# expires at 4249097614968 nsecs [in 3051341096 nsecs]
#4: <f5a90ec8>, it_real_fn, do_setitimer, syslogd/1888
# expires at 4251329900926 nsecs [in 5283627054 nsecs]
.expires_next : 4246432689566 nsecs
.hres_active : 1
.check_clocks : 0
.nr_events : 31306
.idle_tick : 4246020791890 nsecs
.tick_stopped : 1
.idle_jiffies : 986504
.idle_calls : 40700
.idle_sleeps : 36014
.idle_entrytime : 4246019418883 nsecs
.idle_sleeptime : 4178181972709 nsecs
cpu: 1
clock 0:
.index: 0
.resolution: 1 nsecs
.get_time: ktime_get_real
.offset: 1273998312645738432 nsecs
active timers:
clock 1:
.index: 1
.resolution: 1 nsecs
.get_time: ktime_get
.offset: 0 nsecs
active timers:
#0: <f5a90ec8>, hrtimer_sched_tick, hrtimer_restart_sched_tick, swapper/0
# expires at 4246050084568 nsecs [in 3810696 nsecs]
#1: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, atd/2227
# expires at 4261010635003 nsecs [in 14964361131 nsecs]
#2: <f5a90ec8>, hrtimer_wakeup, do_nanosleep, smartd/2332
# expires at 5469485798970 nsecs [in 1223439525098 nsecs]
.expires_next : 4246050084568 nsecs
.hres_active : 1
.check_clocks : 0
.nr_events : 24043
.idle_tick : 4246046084568 nsecs
.tick_stopped : 0
.idle_jiffies : 986510
.idle_calls : 26360
.idle_sleeps : 22551
.idle_entrytime : 4246043874339 nsecs
.idle_sleeptime : 4170763761184 nsecs
tick_broadcast_mask: 00000003
event_broadcast_mask: 00000001
CPU#0's local event device:
Clock Event Device: lapic
capabilities: 0000000e
max_delta_ns: 807385544
min_delta_ns: 1443
mult: 44624025
shift: 32
set_next_event: lapic_next_event
set_mode: lapic_timer_setup
event_handler: hrtimer_interrupt
.installed: 1
.expires: 4246432689566 nsecs
CPU#1's local event device:
Clock Event Device: lapic
capabilities: 0000000e
max_delta_ns: 807385544
min_delta_ns: 1443
mult: 44624025
shift: 32
set_next_event: lapic_next_event
set_mode: lapic_timer_setup
event_handler: hrtimer_interrupt
.installed: 1
.expires: 4246050084568 nsecs
Clock Event Device: hpet
capabilities: 00000007
max_delta_ns: 2147483647
min_delta_ns: 3352
mult: 61496110
shift: 32
set_next_event: hpet_next_event
set_mode: hpet_set_mode
event_handler: handle_nextevt_broadcast
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-02-16 02:28:15 -07:00
|
|
|
struct tick_sched *tick_get_tick_sched(int cpu)
|
|
|
|
{
|
|
|
|
return &per_cpu(tick_cpu_sched, cpu);
|
|
|
|
}
|
|
|
|
|
2016-01-25 08:41:49 -07:00
|
|
|
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
|
|
|
|
/*
|
|
|
|
* The time, when the last jiffy update happened. Protected by jiffies_lock.
|
|
|
|
*/
|
|
|
|
static ktime_t last_jiffies_update;
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* Must be called with interrupts disabled !
|
|
|
|
*/
|
|
|
|
static void tick_do_update_jiffies64(ktime_t now)
|
|
|
|
{
|
|
|
|
unsigned long ticks = 0;
|
|
|
|
ktime_t delta;
|
|
|
|
|
2008-05-12 07:43:53 -06:00
|
|
|
/*
|
2012-02-28 17:50:11 -07:00
|
|
|
* Do a quick check without holding jiffies_lock:
|
2019-12-04 21:56:19 -07:00
|
|
|
* The READ_ONCE() pairs with two updates done later in this function.
|
2008-05-12 07:43:53 -06:00
|
|
|
*/
|
2019-12-04 21:56:19 -07:00
|
|
|
delta = ktime_sub(now, READ_ONCE(last_jiffies_update));
|
2016-12-25 03:38:40 -07:00
|
|
|
if (delta < tick_period)
|
2008-05-12 07:43:53 -06:00
|
|
|
return;
|
|
|
|
|
2016-06-28 22:51:50 -06:00
|
|
|
/* Reevaluate with jiffies_lock held */
|
2012-02-28 17:50:11 -07:00
|
|
|
write_seqlock(&jiffies_lock);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
delta = ktime_sub(now, last_jiffies_update);
|
2016-12-25 03:38:40 -07:00
|
|
|
if (delta >= tick_period) {
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
delta = ktime_sub(delta, tick_period);
|
2019-12-04 21:56:19 -07:00
|
|
|
/* Pairs with the lockless read in this function. */
|
|
|
|
WRITE_ONCE(last_jiffies_update,
|
|
|
|
ktime_add(last_jiffies_update, tick_period));
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
/* Slow path for long timeouts */
|
2016-12-25 03:38:40 -07:00
|
|
|
if (unlikely(delta >= tick_period)) {
|
2007-02-16 02:28:03 -07:00
|
|
|
s64 incr = ktime_to_ns(tick_period);
|
|
|
|
|
|
|
|
ticks = ktime_divns(delta, incr);
|
|
|
|
|
2019-12-04 21:56:19 -07:00
|
|
|
/* Pairs with the lockless read in this function. */
|
|
|
|
WRITE_ONCE(last_jiffies_update,
|
|
|
|
ktime_add_ns(last_jiffies_update,
|
|
|
|
incr * ticks));
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
do_timer(++ticks);
|
2008-09-22 10:56:01 -06:00
|
|
|
|
|
|
|
/* Keep the tick_next_period variable up to date */
|
|
|
|
tick_next_period = ktime_add(last_jiffies_update, tick_period);
|
2014-04-14 23:24:40 -06:00
|
|
|
} else {
|
|
|
|
write_sequnlock(&jiffies_lock);
|
|
|
|
return;
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
2012-02-28 17:50:11 -07:00
|
|
|
write_sequnlock(&jiffies_lock);
|
2013-12-12 14:10:55 -07:00
|
|
|
update_wall_time();
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize and return retrieve the jiffies update.
|
|
|
|
*/
|
|
|
|
static ktime_t tick_init_jiffy_update(void)
|
|
|
|
{
|
|
|
|
ktime_t period;
|
|
|
|
|
2012-02-28 17:50:11 -07:00
|
|
|
write_seqlock(&jiffies_lock);
|
2007-02-16 02:28:03 -07:00
|
|
|
/* Did we start the jiffies update yet ? */
|
2016-12-25 03:38:40 -07:00
|
|
|
if (last_jiffies_update == 0)
|
2007-02-16 02:28:03 -07:00
|
|
|
last_jiffies_update = tick_next_period;
|
|
|
|
period = last_jiffies_update;
|
2012-02-28 17:50:11 -07:00
|
|
|
write_sequnlock(&jiffies_lock);
|
2007-02-16 02:28:03 -07:00
|
|
|
return period;
|
|
|
|
}
|
|
|
|
|
2018-04-06 06:59:13 -06:00
|
|
|
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
|
2012-10-14 18:03:27 -06:00
|
|
|
{
|
|
|
|
int cpu = smp_processor_id();
|
|
|
|
|
2011-08-10 15:21:01 -06:00
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
2012-10-14 18:03:27 -06:00
|
|
|
/*
|
|
|
|
* Check if the do_timer duty was dropped. We don't care about
|
2016-07-01 04:42:35 -06:00
|
|
|
* concurrency: This happens only when the CPU in charge went
|
|
|
|
* into a long sleep. If two CPUs happen to assign themselves to
|
2012-10-14 18:03:27 -06:00
|
|
|
* this duty, then the jiffies update is still serialized by
|
2012-11-21 12:31:52 -07:00
|
|
|
* jiffies_lock.
|
2012-10-14 18:03:27 -06:00
|
|
|
*/
|
2012-12-18 10:24:35 -07:00
|
|
|
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
|
2013-04-12 08:45:34 -06:00
|
|
|
&& !tick_nohz_full_cpu(cpu))
|
2012-10-14 18:03:27 -06:00
|
|
|
tick_do_timer_cpu = cpu;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Check, if the jiffies need an update */
|
|
|
|
if (tick_do_timer_cpu == cpu)
|
|
|
|
tick_do_update_jiffies64(now);
|
2018-04-06 06:59:13 -06:00
|
|
|
|
|
|
|
if (ts->inidle)
|
|
|
|
ts->got_idle_tick = 1;
|
2012-10-14 18:03:27 -06:00
|
|
|
}
|
|
|
|
|
2012-10-14 18:43:03 -06:00
|
|
|
static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
|
|
|
{
|
2011-08-10 15:21:01 -06:00
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
2012-10-14 18:43:03 -06:00
|
|
|
/*
|
|
|
|
* When we are idle and the tick is stopped, we have to touch
|
|
|
|
* the watchdog as we might not schedule for a really long
|
|
|
|
* time. This happens on complete idle SMP systems while
|
|
|
|
* waiting on the login prompt. We also increment the "start of
|
|
|
|
* idle" jiffy stamp so the idle accounting adjustment we do
|
|
|
|
* when we go busy again does not account too much ticks.
|
|
|
|
*/
|
|
|
|
if (ts->tick_stopped) {
|
2015-12-08 09:28:04 -07:00
|
|
|
touch_softlockup_watchdog_sched();
|
2012-10-14 18:43:03 -06:00
|
|
|
if (is_idle_task(current))
|
|
|
|
ts->idle_jiffies++;
|
nohz: Fix collision between tick and other hrtimers, again
This restores commit:
24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers")
... which got reverted by commit:
558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"')
... due to a regression where CPUs spuriously stopped ticking.
The bug happened when a tick fired too early past its expected expiration:
on IRQ exit the tick was scheduled again to the same deadline but skipped
reprogramming because ts->next_tick still kept in cache the deadline.
This has been fixed now with resetting ts->next_tick from the tick
itself. Extra care has also been taken to prevent from obsolete values
throughout CPU hotplug operations.
When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.
In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.
Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.
In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:
Task in a full dynticks CPU
----------------------------
* hrtimer A is queued 2 seconds ahead
* the tick is stopped, scheduled 1 second ahead
* tick fires 1 second later
* on tick exit, nohz schedules the tick 1 second ahead but sees
the clockevent device is already programmed to that deadline,
fooled by hrtimer A, the tick isn't rescheduled.
* hrtimer A is cancelled before its deadline
* tick never fires again until an interrupt happens...
In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.
On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.
Reported-and-tested-by: Tim Wright <tim@binbash.co.uk>
Reported-and-tested-by: Pavel Machek <pavel@ucw.cz>
Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-21 08:00:54 -06:00
|
|
|
/*
|
|
|
|
* In case the current tick fired too early past its expected
|
|
|
|
* expiration, make sure we don't bypass the next clock reprogramming
|
|
|
|
* to the same deadline.
|
|
|
|
*/
|
|
|
|
ts->next_tick = 0;
|
2012-10-14 18:43:03 -06:00
|
|
|
}
|
2012-10-15 08:17:16 -06:00
|
|
|
#endif
|
2012-10-14 18:43:03 -06:00
|
|
|
update_process_times(user_mode(regs));
|
|
|
|
profile_tick(CPU_PROFILING);
|
|
|
|
}
|
2016-01-25 08:41:49 -07:00
|
|
|
#endif
|
2012-10-14 18:43:03 -06:00
|
|
|
|
2013-04-12 08:45:34 -06:00
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
2013-07-24 15:52:27 -06:00
|
|
|
cpumask_var_t tick_nohz_full_mask;
|
2013-07-24 15:31:00 -06:00
|
|
|
bool tick_nohz_full_running;
|
2016-03-24 08:38:00 -06:00
|
|
|
static atomic_t tick_dep_mask;
|
2012-12-18 09:32:19 -07:00
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
static bool check_tick_dependency(atomic_t *dep)
|
2015-06-07 07:54:30 -06:00
|
|
|
{
|
2016-03-24 08:38:00 -06:00
|
|
|
int val = atomic_read(dep);
|
|
|
|
|
|
|
|
if (val & TICK_DEP_MASK_POSIX_TIMER) {
|
2015-12-10 19:27:25 -07:00
|
|
|
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
|
2016-03-24 08:38:00 -06:00
|
|
|
return true;
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (val & TICK_DEP_MASK_PERF_EVENTS) {
|
2015-12-10 19:27:25 -07:00
|
|
|
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
|
2016-03-24 08:38:00 -06:00
|
|
|
return true;
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (val & TICK_DEP_MASK_SCHED) {
|
2015-12-10 19:27:25 -07:00
|
|
|
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
|
2016-03-24 08:38:00 -06:00
|
|
|
return true;
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
|
2015-12-10 19:27:25 -07:00
|
|
|
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
|
2016-03-24 08:38:00 -06:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
2016-09-07 04:51:13 -06:00
|
|
|
static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
|
2013-04-20 07:43:57 -06:00
|
|
|
{
|
2017-11-06 08:01:20 -07:00
|
|
|
lockdep_assert_irqs_disabled();
|
2013-04-20 07:43:57 -06:00
|
|
|
|
2016-09-07 04:51:13 -06:00
|
|
|
if (unlikely(!cpu_online(cpu)))
|
|
|
|
return false;
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (check_tick_dependency(&tick_dep_mask))
|
2015-06-07 07:54:30 -06:00
|
|
|
return false;
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (check_tick_dependency(&ts->tick_dep_mask))
|
2015-06-07 07:54:30 -06:00
|
|
|
return false;
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (check_tick_dependency(¤t->tick_dep_mask))
|
2015-06-07 07:54:30 -06:00
|
|
|
return false;
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
if (check_tick_dependency(¤t->signal->tick_dep_mask))
|
2015-06-07 07:54:30 -06:00
|
|
|
return false;
|
|
|
|
|
2013-04-20 07:43:57 -06:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-07 07:54:30 -06:00
|
|
|
static void nohz_full_kick_func(struct irq_work *work)
|
2013-04-17 16:15:40 -06:00
|
|
|
{
|
2015-05-27 11:22:08 -06:00
|
|
|
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
|
2013-04-17 16:15:40 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
2015-06-07 07:54:30 -06:00
|
|
|
.func = nohz_full_kick_func,
|
2013-04-17 16:15:40 -06:00
|
|
|
};
|
|
|
|
|
2014-08-13 10:50:16 -06:00
|
|
|
/*
|
|
|
|
* Kick this CPU if it's full dynticks in order to force it to
|
|
|
|
* re-evaluate its dependency on the tick and restart it if necessary.
|
|
|
|
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
|
|
|
|
* is NMI safe.
|
|
|
|
*/
|
2015-07-16 09:42:29 -06:00
|
|
|
static void tick_nohz_full_kick(void)
|
2014-08-13 10:50:16 -06:00
|
|
|
{
|
|
|
|
if (!tick_nohz_full_cpu(smp_processor_id()))
|
|
|
|
return;
|
|
|
|
|
2014-10-27 09:49:45 -06:00
|
|
|
irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
|
2014-08-13 10:50:16 -06:00
|
|
|
}
|
|
|
|
|
2013-04-17 16:15:40 -06:00
|
|
|
/*
|
2014-06-04 08:17:33 -06:00
|
|
|
* Kick the CPU if it's full dynticks in order to force it to
|
2013-04-17 16:15:40 -06:00
|
|
|
* re-evaluate its dependency on the tick and restart it if necessary.
|
|
|
|
*/
|
2014-06-04 08:17:33 -06:00
|
|
|
void tick_nohz_full_kick_cpu(int cpu)
|
2013-04-17 16:15:40 -06:00
|
|
|
{
|
2014-06-04 08:17:33 -06:00
|
|
|
if (!tick_nohz_full_cpu(cpu))
|
|
|
|
return;
|
|
|
|
|
|
|
|
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
|
2013-04-17 16:15:40 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Kick all full dynticks CPUs in order to force these to re-evaluate
|
|
|
|
* their dependency on the tick and restart it if necessary.
|
|
|
|
*/
|
2015-07-17 14:25:49 -06:00
|
|
|
static void tick_nohz_full_kick_all(void)
|
2013-04-17 16:15:40 -06:00
|
|
|
{
|
2015-12-07 08:55:23 -07:00
|
|
|
int cpu;
|
|
|
|
|
2013-07-24 15:31:00 -06:00
|
|
|
if (!tick_nohz_full_running)
|
2013-04-17 16:15:40 -06:00
|
|
|
return;
|
|
|
|
|
|
|
|
preempt_disable();
|
2015-12-07 08:55:23 -07:00
|
|
|
for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
|
|
|
|
tick_nohz_full_kick_cpu(cpu);
|
2013-04-17 16:15:40 -06:00
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
static void tick_nohz_dep_set_all(atomic_t *dep,
|
2015-06-07 07:54:30 -06:00
|
|
|
enum tick_dep_bits bit)
|
|
|
|
{
|
2016-03-24 08:38:00 -06:00
|
|
|
int prev;
|
2015-06-07 07:54:30 -06:00
|
|
|
|
2016-04-21 12:35:25 -06:00
|
|
|
prev = atomic_fetch_or(BIT(bit), dep);
|
2015-06-07 07:54:30 -06:00
|
|
|
if (!prev)
|
|
|
|
tick_nohz_full_kick_all();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set a global tick dependency. Used by perf events that rely on freq and
|
|
|
|
* by unstable clock.
|
|
|
|
*/
|
|
|
|
void tick_nohz_dep_set(enum tick_dep_bits bit)
|
|
|
|
{
|
|
|
|
tick_nohz_dep_set_all(&tick_dep_mask, bit);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tick_nohz_dep_clear(enum tick_dep_bits bit)
|
|
|
|
{
|
2016-03-24 08:38:00 -06:00
|
|
|
atomic_andnot(BIT(bit), &tick_dep_mask);
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
|
|
|
|
* manage events throttling.
|
|
|
|
*/
|
|
|
|
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
|
|
|
|
{
|
2016-03-24 08:38:00 -06:00
|
|
|
int prev;
|
2015-06-07 07:54:30 -06:00
|
|
|
struct tick_sched *ts;
|
|
|
|
|
|
|
|
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
|
|
|
|
2016-04-21 12:35:25 -06:00
|
|
|
prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
|
2015-06-07 07:54:30 -06:00
|
|
|
if (!prev) {
|
|
|
|
preempt_disable();
|
|
|
|
/* Perf needs local kick that is NMI safe */
|
|
|
|
if (cpu == smp_processor_id()) {
|
|
|
|
tick_nohz_full_kick();
|
|
|
|
} else {
|
|
|
|
/* Remote irq work not NMI-safe */
|
|
|
|
if (!WARN_ON_ONCE(in_nmi()))
|
|
|
|
tick_nohz_full_kick_cpu(cpu);
|
|
|
|
}
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
|
|
|
|
2016-03-24 08:38:00 -06:00
|
|
|
atomic_andnot(BIT(bit), &ts->tick_dep_mask);
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
|
|
|
|
* per task timers.
|
|
|
|
*/
|
|
|
|
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We could optimize this with just kicking the target running the task
|
|
|
|
* if that noise matters for nohz full users.
|
|
|
|
*/
|
|
|
|
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
|
|
|
|
{
|
2016-03-24 08:38:00 -06:00
|
|
|
atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
|
|
|
|
* per process timers.
|
|
|
|
*/
|
|
|
|
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
|
|
|
|
{
|
|
|
|
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
|
|
|
|
{
|
2016-03-24 08:38:00 -06:00
|
|
|
atomic_andnot(BIT(bit), &sig->tick_dep_mask);
|
2015-06-07 07:54:30 -06:00
|
|
|
}
|
|
|
|
|
2013-04-20 09:11:50 -06:00
|
|
|
/*
|
|
|
|
* Re-evaluate the need for the tick as we switch the current task.
|
|
|
|
* It might need the tick due to per task/process properties:
|
2016-07-01 04:42:35 -06:00
|
|
|
* perf events, posix CPU timers, ...
|
2013-04-20 09:11:50 -06:00
|
|
|
*/
|
2015-06-11 10:07:12 -06:00
|
|
|
void __tick_nohz_task_switch(void)
|
2013-04-20 09:11:50 -06:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
2015-06-07 07:54:30 -06:00
|
|
|
struct tick_sched *ts;
|
2013-04-20 09:11:50 -06:00
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
|
2013-04-27 21:25:58 -06:00
|
|
|
if (!tick_nohz_full_cpu(smp_processor_id()))
|
|
|
|
goto out;
|
|
|
|
|
2015-06-07 07:54:30 -06:00
|
|
|
ts = this_cpu_ptr(&tick_cpu_sched);
|
2013-04-20 09:11:50 -06:00
|
|
|
|
2015-06-07 07:54:30 -06:00
|
|
|
if (ts->tick_stopped) {
|
2016-03-24 08:38:00 -06:00
|
|
|
if (atomic_read(¤t->tick_dep_mask) ||
|
|
|
|
atomic_read(¤t->signal->tick_dep_mask))
|
2015-06-07 07:54:30 -06:00
|
|
|
tick_nohz_full_kick();
|
|
|
|
}
|
2013-04-27 21:25:58 -06:00
|
|
|
out:
|
2013-04-20 09:11:50 -06:00
|
|
|
local_irq_restore(flags);
|
|
|
|
}
|
|
|
|
|
2017-10-26 20:42:36 -06:00
|
|
|
/* Get the boot-time nohz CPU list from the kernel parameters. */
|
|
|
|
void __init tick_nohz_full_setup(cpumask_var_t cpumask)
|
2012-12-18 09:32:19 -07:00
|
|
|
{
|
2013-07-24 15:31:00 -06:00
|
|
|
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
|
2017-10-26 20:42:36 -06:00
|
|
|
cpumask_copy(tick_nohz_full_mask, cpumask);
|
2013-07-24 15:31:00 -06:00
|
|
|
tick_nohz_full_running = true;
|
2012-12-18 09:32:19 -07:00
|
|
|
}
|
|
|
|
|
2016-11-17 11:35:34 -07:00
|
|
|
static int tick_nohz_cpu_down(unsigned int cpu)
|
2012-12-18 10:24:35 -07:00
|
|
|
{
|
2016-11-17 11:35:34 -07:00
|
|
|
/*
|
|
|
|
* The boot CPU handles housekeeping duty (unbound timers,
|
|
|
|
* workqueues, timekeeping, ...) on behalf of full dynticks
|
|
|
|
* CPUs. It must remain online when nohz full is enabled.
|
|
|
|
*/
|
|
|
|
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
|
|
|
|
return -EBUSY;
|
|
|
|
return 0;
|
2012-12-18 10:24:35 -07:00
|
|
|
}
|
|
|
|
|
2013-03-26 16:47:24 -06:00
|
|
|
void __init tick_nohz_init(void)
|
2012-12-18 09:32:19 -07:00
|
|
|
{
|
2016-11-17 11:35:34 -07:00
|
|
|
int cpu, ret;
|
2013-03-26 16:47:24 -06:00
|
|
|
|
2017-11-30 16:36:35 -07:00
|
|
|
if (!tick_nohz_full_running)
|
|
|
|
return;
|
2013-03-26 16:47:24 -06:00
|
|
|
|
2014-08-17 17:36:07 -06:00
|
|
|
/*
|
|
|
|
* Full dynticks uses irq work to drive the tick rescheduling on safe
|
|
|
|
* locking contexts. But then we need irq work to raise its own
|
|
|
|
* interrupts to avoid circular dependency on the tick
|
|
|
|
*/
|
|
|
|
if (!arch_irq_work_has_interrupt()) {
|
2016-03-22 15:28:09 -06:00
|
|
|
pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
|
2014-08-17 17:36:07 -06:00
|
|
|
cpumask_clear(tick_nohz_full_mask);
|
|
|
|
tick_nohz_full_running = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-08-17 14:02:55 -06:00
|
|
|
cpu = smp_processor_id();
|
|
|
|
|
|
|
|
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
|
2016-03-22 15:28:09 -06:00
|
|
|
pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
|
|
|
|
cpu);
|
2014-08-17 14:02:55 -06:00
|
|
|
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
|
|
|
|
}
|
|
|
|
|
2013-07-24 15:31:00 -06:00
|
|
|
for_each_cpu(cpu, tick_nohz_full_mask)
|
2013-07-09 16:55:25 -06:00
|
|
|
context_tracking_cpu_set(cpu);
|
|
|
|
|
2016-11-17 11:35:34 -07:00
|
|
|
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
|
|
|
|
"kernel/nohz:predown", NULL,
|
|
|
|
tick_nohz_cpu_down);
|
|
|
|
WARN_ON(ret < 0);
|
2015-02-13 15:37:31 -07:00
|
|
|
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
|
|
|
|
cpumask_pr_args(tick_nohz_full_mask));
|
2012-12-18 09:32:19 -07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* NOHZ - aka dynamic tick functionality
|
|
|
|
*/
|
2011-08-10 15:21:01 -06:00
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* NO HZ enabled ?
|
|
|
|
*/
|
2016-03-17 15:23:00 -06:00
|
|
|
bool tick_nohz_enabled __read_mostly = true;
|
2015-05-26 16:50:33 -06:00
|
|
|
unsigned long tick_nohz_active __read_mostly;
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* Enable / Disable tickless mode
|
|
|
|
*/
|
|
|
|
static int __init setup_tick_nohz(char *str)
|
|
|
|
{
|
2016-03-17 15:23:00 -06:00
|
|
|
return (kstrtobool(str, &tick_nohz_enabled) == 0);
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
__setup("nohz=", setup_tick_nohz);
|
|
|
|
|
2018-02-20 21:17:24 -07:00
|
|
|
bool tick_nohz_tick_stopped(void)
|
2015-03-25 06:07:37 -06:00
|
|
|
{
|
2018-04-05 20:32:37 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
|
|
|
|
|
|
|
return ts->tick_stopped;
|
2015-03-25 06:07:37 -06:00
|
|
|
}
|
|
|
|
|
2018-02-20 21:17:25 -07:00
|
|
|
bool tick_nohz_tick_stopped_cpu(int cpu)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
|
|
|
|
|
|
|
return ts->tick_stopped;
|
|
|
|
}
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/**
|
|
|
|
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
|
|
|
|
*
|
|
|
|
* Called from interrupt entry when the CPU was idle
|
|
|
|
*
|
|
|
|
* In case the sched_tick was stopped on this CPU, we have to check if jiffies
|
|
|
|
* must be updated. Otherwise an interrupt handler could use a stale jiffy
|
2016-07-01 04:42:35 -06:00
|
|
|
* value. We do this unconditionally on any CPU, as we don't know whether the
|
|
|
|
* CPU, which has the update task assigned is in a long sleep.
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
2009-09-29 06:25:15 -06:00
|
|
|
static void tick_nohz_update_jiffies(ktime_t now)
|
2007-02-16 02:28:03 -07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2013-08-07 14:28:01 -06:00
|
|
|
__this_cpu_write(tick_cpu_sched.idle_waketime, now);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
tick_do_update_jiffies64(now);
|
|
|
|
local_irq_restore(flags);
|
2008-05-12 07:43:53 -06:00
|
|
|
|
2015-12-08 09:28:04 -07:00
|
|
|
touch_softlockup_watchdog_sched();
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
|
2010-05-09 09:22:45 -06:00
|
|
|
/*
|
2016-07-01 04:42:35 -06:00
|
|
|
* Updates the per-CPU time idle statistics counters
|
2010-05-09 09:22:45 -06:00
|
|
|
*/
|
2010-05-09 09:24:03 -06:00
|
|
|
static void
|
2010-07-01 01:07:17 -06:00
|
|
|
update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
|
2008-01-30 05:30:04 -07:00
|
|
|
{
|
2009-09-29 06:25:15 -06:00
|
|
|
ktime_t delta;
|
2008-01-30 05:30:04 -07:00
|
|
|
|
2010-05-09 09:22:45 -06:00
|
|
|
if (ts->idle_active) {
|
|
|
|
delta = ktime_sub(now, ts->idle_entrytime);
|
2010-07-01 01:07:17 -06:00
|
|
|
if (nr_iowait_cpu(cpu) > 0)
|
2010-05-09 09:25:23 -06:00
|
|
|
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
|
2011-08-24 01:37:48 -06:00
|
|
|
else
|
|
|
|
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
|
2010-05-09 09:23:23 -06:00
|
|
|
ts->idle_entrytime = now;
|
2010-05-09 09:22:45 -06:00
|
|
|
}
|
2010-05-09 09:24:03 -06:00
|
|
|
|
2010-05-09 09:24:39 -06:00
|
|
|
if (last_update_time)
|
2010-05-09 09:24:03 -06:00
|
|
|
*last_update_time = ktime_to_us(now);
|
|
|
|
|
2010-05-09 09:22:45 -06:00
|
|
|
}
|
|
|
|
|
2013-08-07 14:28:01 -06:00
|
|
|
static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
|
2010-05-09 09:22:45 -06:00
|
|
|
{
|
2013-08-07 14:28:01 -06:00
|
|
|
update_ts_time_stats(smp_processor_id(), ts, now, NULL);
|
2009-09-29 06:25:15 -06:00
|
|
|
ts->idle_active = 0;
|
2008-09-01 08:44:23 -06:00
|
|
|
|
2017-04-21 04:26:23 -06:00
|
|
|
sched_clock_idle_wakeup_event();
|
2008-01-30 05:30:04 -07:00
|
|
|
}
|
|
|
|
|
2018-04-05 10:58:27 -06:00
|
|
|
static void tick_nohz_start_idle(struct tick_sched *ts)
|
2008-01-30 05:30:04 -07:00
|
|
|
{
|
2018-04-05 10:58:27 -06:00
|
|
|
ts->idle_entrytime = ktime_get();
|
2008-01-30 05:30:04 -07:00
|
|
|
ts->idle_active = 1;
|
2008-09-01 08:44:23 -06:00
|
|
|
sched_clock_idle_sleep_event();
|
2008-01-30 05:30:04 -07:00
|
|
|
}
|
|
|
|
|
2010-05-09 09:22:08 -06:00
|
|
|
/**
|
2016-07-01 04:42:35 -06:00
|
|
|
* get_cpu_idle_time_us - get the total idle time of a CPU
|
2010-05-09 09:22:08 -06:00
|
|
|
* @cpu: CPU number to query
|
2011-08-24 01:39:30 -06:00
|
|
|
* @last_update_time: variable to store update time in. Do not update
|
|
|
|
* counters if NULL.
|
2010-05-09 09:22:08 -06:00
|
|
|
*
|
2016-06-28 22:51:50 -06:00
|
|
|
* Return the cumulative idle time (since boot) for a given
|
2011-08-24 01:37:48 -06:00
|
|
|
* CPU, in microseconds.
|
2010-05-09 09:22:08 -06:00
|
|
|
*
|
|
|
|
* This time is measured via accounting rather than sampling,
|
|
|
|
* and is as accurate as ktime_get() is.
|
|
|
|
*
|
|
|
|
* This function returns -1 if NOHZ is not enabled.
|
|
|
|
*/
|
2008-01-30 05:30:04 -07:00
|
|
|
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
2011-08-24 01:39:30 -06:00
|
|
|
ktime_t now, idle;
|
2008-01-30 05:30:04 -07:00
|
|
|
|
2013-11-13 13:01:57 -07:00
|
|
|
if (!tick_nohz_active)
|
2008-08-04 12:59:11 -06:00
|
|
|
return -1;
|
|
|
|
|
2011-08-24 01:39:30 -06:00
|
|
|
now = ktime_get();
|
|
|
|
if (last_update_time) {
|
|
|
|
update_ts_time_stats(cpu, ts, now, last_update_time);
|
|
|
|
idle = ts->idle_sleeptime;
|
|
|
|
} else {
|
|
|
|
if (ts->idle_active && !nr_iowait_cpu(cpu)) {
|
|
|
|
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
|
|
|
|
|
|
|
|
idle = ktime_add(ts->idle_sleeptime, delta);
|
|
|
|
} else {
|
|
|
|
idle = ts->idle_sleeptime;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ktime_to_us(idle);
|
2008-08-04 12:59:11 -06:00
|
|
|
|
2008-01-30 05:30:04 -07:00
|
|
|
}
|
2008-08-04 12:59:11 -06:00
|
|
|
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
|
2008-01-30 05:30:04 -07:00
|
|
|
|
2011-08-24 01:37:48 -06:00
|
|
|
/**
|
2016-07-01 04:42:35 -06:00
|
|
|
* get_cpu_iowait_time_us - get the total iowait time of a CPU
|
2010-05-09 09:25:23 -06:00
|
|
|
* @cpu: CPU number to query
|
2011-08-24 01:39:30 -06:00
|
|
|
* @last_update_time: variable to store update time in. Do not update
|
|
|
|
* counters if NULL.
|
2010-05-09 09:25:23 -06:00
|
|
|
*
|
2016-06-28 22:51:50 -06:00
|
|
|
* Return the cumulative iowait time (since boot) for a given
|
2010-05-09 09:25:23 -06:00
|
|
|
* CPU, in microseconds.
|
|
|
|
*
|
|
|
|
* This time is measured via accounting rather than sampling,
|
|
|
|
* and is as accurate as ktime_get() is.
|
|
|
|
*
|
|
|
|
* This function returns -1 if NOHZ is not enabled.
|
|
|
|
*/
|
|
|
|
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
2011-08-24 01:39:30 -06:00
|
|
|
ktime_t now, iowait;
|
2010-05-09 09:25:23 -06:00
|
|
|
|
2013-11-13 13:01:57 -07:00
|
|
|
if (!tick_nohz_active)
|
2010-05-09 09:25:23 -06:00
|
|
|
return -1;
|
|
|
|
|
2011-08-24 01:39:30 -06:00
|
|
|
now = ktime_get();
|
|
|
|
if (last_update_time) {
|
|
|
|
update_ts_time_stats(cpu, ts, now, last_update_time);
|
|
|
|
iowait = ts->iowait_sleeptime;
|
|
|
|
} else {
|
|
|
|
if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
|
|
|
|
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
|
2010-05-09 09:25:23 -06:00
|
|
|
|
2011-08-24 01:39:30 -06:00
|
|
|
iowait = ktime_add(ts->iowait_sleeptime, delta);
|
|
|
|
} else {
|
|
|
|
iowait = ts->iowait_sleeptime;
|
|
|
|
}
|
|
|
|
}
|
2010-05-09 09:25:23 -06:00
|
|
|
|
2011-08-24 01:39:30 -06:00
|
|
|
return ktime_to_us(iowait);
|
2010-05-09 09:25:23 -06:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
|
|
|
|
|
2015-04-14 15:08:54 -06:00
|
|
|
static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
|
|
|
|
{
|
|
|
|
hrtimer_cancel(&ts->sched_timer);
|
|
|
|
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
|
|
|
|
|
|
|
|
/* Forward the time to expire in the future */
|
|
|
|
hrtimer_forward(&ts->sched_timer, now, tick_period);
|
|
|
|
|
|
|
|
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
|
|
|
|
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
|
|
|
|
else
|
|
|
|
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
|
nohz: Fix collision between tick and other hrtimers, again
This restores commit:
24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers")
... which got reverted by commit:
558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"')
... due to a regression where CPUs spuriously stopped ticking.
The bug happened when a tick fired too early past its expected expiration:
on IRQ exit the tick was scheduled again to the same deadline but skipped
reprogramming because ts->next_tick still kept in cache the deadline.
This has been fixed now with resetting ts->next_tick from the tick
itself. Extra care has also been taken to prevent from obsolete values
throughout CPU hotplug operations.
When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.
In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.
Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.
In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:
Task in a full dynticks CPU
----------------------------
* hrtimer A is queued 2 seconds ahead
* the tick is stopped, scheduled 1 second ahead
* tick fires 1 second later
* on tick exit, nohz schedules the tick 1 second ahead but sees
the clockevent device is already programmed to that deadline,
fooled by hrtimer A, the tick isn't rescheduled.
* hrtimer A is cancelled before its deadline
* tick never fires again until an interrupt happens...
In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.
On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.
Reported-and-tested-by: Tim Wright <tim@binbash.co.uk>
Reported-and-tested-by: Pavel Machek <pavel@ucw.cz>
Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-21 08:00:54 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Reset to make sure next tick stop doesn't get fooled by past
|
|
|
|
* cached clock deadline.
|
|
|
|
*/
|
|
|
|
ts->next_tick = 0;
|
2015-04-14 15:08:54 -06:00
|
|
|
}
|
|
|
|
|
2017-12-22 07:51:13 -07:00
|
|
|
static inline bool local_timer_softirq_pending(void)
|
|
|
|
{
|
2018-07-31 10:13:58 -06:00
|
|
|
return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
|
2017-12-22 07:51:13 -07:00
|
|
|
}
|
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
|
2007-02-16 02:28:03 -07:00
|
|
|
{
|
2015-04-14 15:08:58 -06:00
|
|
|
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
|
|
|
|
unsigned long seq, basejiff;
|
2013-12-16 16:16:37 -07:00
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/* Read jiffies and the time when jiffies were updated last */
|
|
|
|
do {
|
2012-02-28 17:50:11 -07:00
|
|
|
seq = read_seqbegin(&jiffies_lock);
|
2016-12-25 03:38:40 -07:00
|
|
|
basemono = last_jiffies_update;
|
2015-04-14 15:08:58 -06:00
|
|
|
basejiff = jiffies;
|
2012-02-28 17:50:11 -07:00
|
|
|
} while (read_seqretry(&jiffies_lock, seq));
|
2015-04-14 15:08:58 -06:00
|
|
|
ts->last_jiffies = basejiff;
|
2018-04-05 11:07:57 -06:00
|
|
|
ts->timer_expires_base = basemono;
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2017-12-22 07:51:13 -07:00
|
|
|
/*
|
|
|
|
* Keep the periodic tick, when RCU, architecture or irq_work
|
|
|
|
* requests it.
|
|
|
|
* Aside of that check whether the local timer softirq is
|
|
|
|
* pending. If so its a bad idea to call get_next_timer_interrupt()
|
|
|
|
* because there is an already expired timer, so it will request
|
|
|
|
* immeditate expiry, which rearms the hardware timer with a
|
|
|
|
* minimal delta which brings us back to this place
|
|
|
|
* immediately. Lather, rinse and repeat...
|
|
|
|
*/
|
|
|
|
if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
|
|
|
|
irq_work_needs_cpu() || local_timer_softirq_pending()) {
|
2015-04-14 15:08:58 -06:00
|
|
|
next_tick = basemono + TICK_NSEC;
|
2009-09-29 06:25:16 -06:00
|
|
|
} else {
|
2015-04-14 15:08:58 -06:00
|
|
|
/*
|
|
|
|
* Get the next pending timer. If high resolution
|
|
|
|
* timers are enabled this only takes the timer wheel
|
|
|
|
* timers into account. If high resolution timers are
|
|
|
|
* disabled this also looks at the next expiring
|
|
|
|
* hrtimer.
|
|
|
|
*/
|
|
|
|
next_tmr = get_next_timer_interrupt(basejiff, basemono);
|
|
|
|
ts->next_timer = next_tmr;
|
|
|
|
/* Take the next rcu event into account */
|
|
|
|
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
|
2009-09-29 06:25:16 -06:00
|
|
|
}
|
2013-04-26 02:05:59 -06:00
|
|
|
|
2015-04-14 15:08:58 -06:00
|
|
|
/*
|
|
|
|
* If the tick is due in the next period, keep it ticking or
|
2015-11-19 09:21:06 -07:00
|
|
|
* force prod the timer.
|
2015-04-14 15:08:58 -06:00
|
|
|
*/
|
|
|
|
delta = next_tick - basemono;
|
|
|
|
if (delta <= (u64)TICK_NSEC) {
|
2016-07-04 03:50:36 -06:00
|
|
|
/*
|
|
|
|
* Tell the timer code that the base is not idle, i.e. undo
|
|
|
|
* the effect of get_next_timer_interrupt():
|
|
|
|
*/
|
|
|
|
timer_clear_idle();
|
2015-11-19 09:21:06 -07:00
|
|
|
/*
|
|
|
|
* We've not stopped the tick yet, and there's a timer in the
|
|
|
|
* next period, so no point in stopping it either, bail.
|
|
|
|
*/
|
2017-06-01 08:47:09 -06:00
|
|
|
if (!ts->tick_stopped) {
|
2018-04-05 11:07:57 -06:00
|
|
|
ts->timer_expires = 0;
|
2015-04-14 15:08:56 -06:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
/*
|
|
|
|
* If this CPU is the one which had the do_timer() duty last, we limit
|
|
|
|
* the sleep time to the timekeeping max_deferment value.
|
|
|
|
* Otherwise we can sleep as long as we want.
|
|
|
|
*/
|
|
|
|
delta = timekeeping_max_deferment();
|
|
|
|
if (cpu != tick_do_timer_cpu &&
|
|
|
|
(tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
|
|
|
|
delta = KTIME_MAX;
|
|
|
|
|
|
|
|
/* Calculate the next expiry time */
|
|
|
|
if (delta < (KTIME_MAX - basemono))
|
|
|
|
expires = basemono + delta;
|
|
|
|
else
|
|
|
|
expires = KTIME_MAX;
|
|
|
|
|
|
|
|
ts->timer_expires = min_t(u64, expires, next_tick);
|
|
|
|
|
|
|
|
out:
|
|
|
|
return ts->timer_expires;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
|
|
|
|
{
|
|
|
|
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
|
|
|
|
u64 basemono = ts->timer_expires_base;
|
|
|
|
u64 expires = ts->timer_expires;
|
|
|
|
ktime_t tick = expires;
|
|
|
|
|
|
|
|
/* Make sure we won't be trying to stop it twice in a row. */
|
|
|
|
ts->timer_expires_base = 0;
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
2016-07-01 04:42:35 -06:00
|
|
|
* If this CPU is the one which updates jiffies, then give up
|
|
|
|
* the assignment and let it be taken by the CPU which runs
|
|
|
|
* the tick timer next, which might be this CPU as well. If we
|
2015-04-14 15:08:56 -06:00
|
|
|
* don't drop this here the jiffies might be stale and
|
|
|
|
* do_timer() never invoked. Keep track of the fact that it
|
2018-04-05 11:07:57 -06:00
|
|
|
* was the one which had the do_timer() duty last.
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
2015-04-14 15:08:56 -06:00
|
|
|
if (cpu == tick_do_timer_cpu) {
|
|
|
|
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
|
|
|
ts->do_timer_last = 1;
|
|
|
|
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
|
|
|
|
ts->do_timer_last = 0;
|
|
|
|
}
|
2009-11-12 14:12:06 -07:00
|
|
|
|
2015-04-14 15:08:56 -06:00
|
|
|
/* Skip reprogram of event if its not changed */
|
nohz: Fix collision between tick and other hrtimers, again
This restores commit:
24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers")
... which got reverted by commit:
558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"')
... due to a regression where CPUs spuriously stopped ticking.
The bug happened when a tick fired too early past its expected expiration:
on IRQ exit the tick was scheduled again to the same deadline but skipped
reprogramming because ts->next_tick still kept in cache the deadline.
This has been fixed now with resetting ts->next_tick from the tick
itself. Extra care has also been taken to prevent from obsolete values
throughout CPU hotplug operations.
When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.
In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.
Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.
In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:
Task in a full dynticks CPU
----------------------------
* hrtimer A is queued 2 seconds ahead
* the tick is stopped, scheduled 1 second ahead
* tick fires 1 second later
* on tick exit, nohz schedules the tick 1 second ahead but sees
the clockevent device is already programmed to that deadline,
fooled by hrtimer A, the tick isn't rescheduled.
* hrtimer A is cancelled before its deadline
* tick never fires again until an interrupt happens...
In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.
On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.
Reported-and-tested-by: Tim Wright <tim@binbash.co.uk>
Reported-and-tested-by: Pavel Machek <pavel@ucw.cz>
Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-21 08:00:54 -06:00
|
|
|
if (ts->tick_stopped && (expires == ts->next_tick)) {
|
|
|
|
/* Sanity check: make sure clockevent is actually programmed */
|
2017-06-12 22:04:14 -06:00
|
|
|
if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
|
2018-04-05 11:07:57 -06:00
|
|
|
return;
|
nohz: Fix collision between tick and other hrtimers, again
This restores commit:
24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers")
... which got reverted by commit:
558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"')
... due to a regression where CPUs spuriously stopped ticking.
The bug happened when a tick fired too early past its expected expiration:
on IRQ exit the tick was scheduled again to the same deadline but skipped
reprogramming because ts->next_tick still kept in cache the deadline.
This has been fixed now with resetting ts->next_tick from the tick
itself. Extra care has also been taken to prevent from obsolete values
throughout CPU hotplug operations.
When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.
In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.
Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.
In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:
Task in a full dynticks CPU
----------------------------
* hrtimer A is queued 2 seconds ahead
* the tick is stopped, scheduled 1 second ahead
* tick fires 1 second later
* on tick exit, nohz schedules the tick 1 second ahead but sees
the clockevent device is already programmed to that deadline,
fooled by hrtimer A, the tick isn't rescheduled.
* hrtimer A is cancelled before its deadline
* tick never fires again until an interrupt happens...
In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.
On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.
Reported-and-tested-by: Tim Wright <tim@binbash.co.uk>
Reported-and-tested-by: Pavel Machek <pavel@ucw.cz>
Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-21 08:00:54 -06:00
|
|
|
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
|
|
|
|
basemono, ts->next_tick, dev->next_event,
|
|
|
|
hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
|
2017-05-11 08:36:19 -06:00
|
|
|
}
|
2011-07-31 17:25:38 -06:00
|
|
|
|
2015-04-14 15:08:56 -06:00
|
|
|
/*
|
|
|
|
* nohz_stop_sched_tick can be called several times before
|
|
|
|
* the nohz_restart_sched_tick is called. This happens when
|
|
|
|
* interrupts arrive which do not cause a reschedule. In the
|
|
|
|
* first call we save the current tick time, so we can restart
|
|
|
|
* the scheduler tick in nohz_restart_sched_tick.
|
|
|
|
*/
|
|
|
|
if (!ts->tick_stopped) {
|
2017-06-18 20:12:00 -06:00
|
|
|
calc_load_nohz_start();
|
2016-04-13 07:56:51 -06:00
|
|
|
cpu_load_update_nohz_start();
|
2017-08-29 07:07:54 -06:00
|
|
|
quiet_vmstat();
|
2007-05-08 01:30:03 -06:00
|
|
|
|
2015-04-14 15:08:56 -06:00
|
|
|
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
|
|
|
ts->tick_stopped = 1;
|
2015-12-10 19:27:25 -07:00
|
|
|
trace_tick_stop(1, TICK_DEP_MASK_NONE);
|
2015-04-14 15:08:56 -06:00
|
|
|
}
|
2007-05-29 15:47:39 -06:00
|
|
|
|
nohz: Fix collision between tick and other hrtimers, again
This restores commit:
24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers")
... which got reverted by commit:
558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"')
... due to a regression where CPUs spuriously stopped ticking.
The bug happened when a tick fired too early past its expected expiration:
on IRQ exit the tick was scheduled again to the same deadline but skipped
reprogramming because ts->next_tick still kept in cache the deadline.
This has been fixed now with resetting ts->next_tick from the tick
itself. Extra care has also been taken to prevent from obsolete values
throughout CPU hotplug operations.
When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.
In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.
Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.
In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:
Task in a full dynticks CPU
----------------------------
* hrtimer A is queued 2 seconds ahead
* the tick is stopped, scheduled 1 second ahead
* tick fires 1 second later
* on tick exit, nohz schedules the tick 1 second ahead but sees
the clockevent device is already programmed to that deadline,
fooled by hrtimer A, the tick isn't rescheduled.
* hrtimer A is cancelled before its deadline
* tick never fires again until an interrupt happens...
In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.
On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.
Reported-and-tested-by: Tim Wright <tim@binbash.co.uk>
Reported-and-tested-by: Pavel Machek <pavel@ucw.cz>
Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-21 08:00:54 -06:00
|
|
|
ts->next_tick = tick;
|
|
|
|
|
2015-04-14 15:08:56 -06:00
|
|
|
/*
|
2015-04-14 15:08:58 -06:00
|
|
|
* If the expiration time == KTIME_MAX, then we simply stop
|
|
|
|
* the tick timer.
|
2015-04-14 15:08:56 -06:00
|
|
|
*/
|
2015-04-14 15:08:58 -06:00
|
|
|
if (unlikely(expires == KTIME_MAX)) {
|
2015-04-14 15:08:56 -06:00
|
|
|
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
|
|
|
|
hrtimer_cancel(&ts->sched_timer);
|
2018-04-05 11:07:57 -06:00
|
|
|
return;
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
2015-04-14 15:08:54 -06:00
|
|
|
|
2018-04-24 13:22:18 -06:00
|
|
|
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
|
|
|
|
hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
|
|
|
|
} else {
|
|
|
|
hrtimer_set_expires(&ts->sched_timer, tick);
|
2015-04-14 15:08:58 -06:00
|
|
|
tick_program_event(tick, 1);
|
2018-04-24 13:22:18 -06:00
|
|
|
}
|
2011-10-07 10:22:06 -06:00
|
|
|
}
|
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
static void tick_nohz_retain_tick(struct tick_sched *ts)
|
|
|
|
{
|
|
|
|
ts->timer_expires_base = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
|
|
static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
|
|
|
|
{
|
|
|
|
if (tick_nohz_next_event(ts, cpu))
|
|
|
|
tick_nohz_stop_tick(ts, cpu);
|
|
|
|
else
|
|
|
|
tick_nohz_retain_tick(ts);
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_NO_HZ_FULL */
|
|
|
|
|
2016-04-13 07:56:51 -06:00
|
|
|
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
|
2015-05-29 06:42:15 -06:00
|
|
|
{
|
|
|
|
/* Update jiffies first */
|
|
|
|
tick_do_update_jiffies64(now);
|
2016-04-13 07:56:51 -06:00
|
|
|
cpu_load_update_nohz_stop();
|
2015-05-29 06:42:15 -06:00
|
|
|
|
2016-07-04 03:50:36 -06:00
|
|
|
/*
|
|
|
|
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
|
|
|
|
* the clock forward checks in the enqueue path:
|
|
|
|
*/
|
|
|
|
timer_clear_idle();
|
|
|
|
|
2017-06-18 20:12:00 -06:00
|
|
|
calc_load_nohz_stop();
|
2015-12-08 09:28:04 -07:00
|
|
|
touch_softlockup_watchdog_sched();
|
2015-05-29 06:42:15 -06:00
|
|
|
/*
|
|
|
|
* Cancel the scheduled timer and restore the tick
|
|
|
|
*/
|
|
|
|
ts->tick_stopped = 0;
|
|
|
|
ts->idle_exittime = now;
|
|
|
|
|
|
|
|
tick_nohz_restart(ts, now);
|
|
|
|
}
|
2015-05-27 11:22:08 -06:00
|
|
|
|
|
|
|
static void tick_nohz_full_update_tick(struct tick_sched *ts)
|
2013-04-20 08:40:31 -06:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
2013-11-27 23:27:11 -07:00
|
|
|
int cpu = smp_processor_id();
|
2013-04-20 08:40:31 -06:00
|
|
|
|
2015-05-27 07:42:42 -06:00
|
|
|
if (!tick_nohz_full_cpu(cpu))
|
2013-11-27 23:27:11 -07:00
|
|
|
return;
|
2013-04-20 08:40:31 -06:00
|
|
|
|
2013-11-27 23:27:11 -07:00
|
|
|
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
|
|
|
|
return;
|
2013-04-20 08:40:31 -06:00
|
|
|
|
2016-09-07 04:51:13 -06:00
|
|
|
if (can_stop_full_tick(cpu, ts))
|
2018-04-05 11:07:57 -06:00
|
|
|
tick_nohz_stop_sched_tick(ts, cpu);
|
2015-05-27 11:22:08 -06:00
|
|
|
else if (ts->tick_stopped)
|
2016-04-13 07:56:51 -06:00
|
|
|
tick_nohz_restart_sched_tick(ts, ktime_get());
|
2013-04-20 08:40:31 -06:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-07-31 16:06:10 -06:00
|
|
|
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
|
|
|
|
{
|
|
|
|
/*
|
2016-07-01 04:42:35 -06:00
|
|
|
* If this CPU is offline and it is the one which updates
|
2011-07-31 16:06:10 -06:00
|
|
|
* jiffies, then give up the assignment and let it be taken by
|
2016-07-01 04:42:35 -06:00
|
|
|
* the CPU which runs the tick timer next. If we don't drop
|
2011-07-31 16:06:10 -06:00
|
|
|
* this here the jiffies might be stale and do_timer() never
|
|
|
|
* invoked.
|
|
|
|
*/
|
|
|
|
if (unlikely(!cpu_online(cpu))) {
|
|
|
|
if (cpu == tick_do_timer_cpu)
|
|
|
|
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
nohz: Fix collision between tick and other hrtimers, again
This restores commit:
24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers")
... which got reverted by commit:
558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"')
... due to a regression where CPUs spuriously stopped ticking.
The bug happened when a tick fired too early past its expected expiration:
on IRQ exit the tick was scheduled again to the same deadline but skipped
reprogramming because ts->next_tick still kept in cache the deadline.
This has been fixed now with resetting ts->next_tick from the tick
itself. Extra care has also been taken to prevent from obsolete values
throughout CPU hotplug operations.
When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.
In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.
Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.
In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:
Task in a full dynticks CPU
----------------------------
* hrtimer A is queued 2 seconds ahead
* the tick is stopped, scheduled 1 second ahead
* tick fires 1 second later
* on tick exit, nohz schedules the tick 1 second ahead but sees
the clockevent device is already programmed to that deadline,
fooled by hrtimer A, the tick isn't rescheduled.
* hrtimer A is cancelled before its deadline
* tick never fires again until an interrupt happens...
In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.
On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.
Reported-and-tested-by: Tim Wright <tim@binbash.co.uk>
Reported-and-tested-by: Pavel Machek <pavel@ucw.cz>
Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-21 08:00:54 -06:00
|
|
|
/*
|
|
|
|
* Make sure the CPU doesn't get fooled by obsolete tick
|
|
|
|
* deadline if it comes back online later.
|
|
|
|
*/
|
|
|
|
ts->next_tick = 0;
|
2013-05-13 13:40:27 -06:00
|
|
|
return false;
|
2011-07-31 16:06:10 -06:00
|
|
|
}
|
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
|
2011-07-31 16:06:10 -06:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (need_resched())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
|
|
|
|
static int ratelimit;
|
|
|
|
|
2012-08-23 09:34:07 -06:00
|
|
|
if (ratelimit < 10 &&
|
|
|
|
(local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
|
2013-02-08 10:37:30 -07:00
|
|
|
pr_warn("NOHZ: local_softirq_pending %02x\n",
|
|
|
|
(unsigned int) local_softirq_pending());
|
2011-07-31 16:06:10 -06:00
|
|
|
ratelimit++;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-07-24 15:52:27 -06:00
|
|
|
if (tick_nohz_full_enabled()) {
|
2012-12-18 10:24:35 -07:00
|
|
|
/*
|
|
|
|
* Keep the tick alive to guarantee timekeeping progression
|
|
|
|
* if there are full dynticks CPUs around
|
|
|
|
*/
|
|
|
|
if (tick_do_timer_cpu == cpu)
|
|
|
|
return false;
|
|
|
|
/*
|
|
|
|
* Boot safety: make sure the timekeeping duty has been
|
|
|
|
* assigned before entering dyntick-idle mode,
|
|
|
|
*/
|
|
|
|
if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-07-31 16:06:10 -06:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-04-05 10:58:27 -06:00
|
|
|
static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
|
2011-07-27 09:29:28 -06:00
|
|
|
{
|
2018-04-05 10:58:27 -06:00
|
|
|
ktime_t expires;
|
2011-07-31 16:06:10 -06:00
|
|
|
int cpu = smp_processor_id();
|
2011-07-27 09:29:28 -06:00
|
|
|
|
2017-05-18 00:26:09 -06:00
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
if (check_pending_deferrable_timers(cpu))
|
|
|
|
raise_softirq_irqoff(TIMER_SOFTIRQ);
|
|
|
|
#endif
|
|
|
|
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
/*
|
|
|
|
* If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
|
|
|
|
* tick timer expiration time is known already.
|
|
|
|
*/
|
|
|
|
if (ts->timer_expires_base)
|
|
|
|
expires = ts->timer_expires;
|
|
|
|
else if (can_stop_idle_tick(cpu, ts))
|
|
|
|
expires = tick_nohz_next_event(ts, cpu);
|
|
|
|
else
|
|
|
|
return;
|
2018-04-05 11:07:57 -06:00
|
|
|
|
|
|
|
ts->idle_calls++;
|
2016-09-02 00:38:23 -06:00
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
if (expires > 0LL) {
|
2011-07-31 16:06:10 -06:00
|
|
|
int was_stopped = ts->tick_stopped;
|
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
tick_nohz_stop_tick(ts, cpu);
|
2011-07-31 17:25:38 -06:00
|
|
|
|
2018-04-05 11:07:57 -06:00
|
|
|
ts->idle_sleeps++;
|
|
|
|
ts->idle_expires = expires;
|
2011-07-31 16:06:10 -06:00
|
|
|
|
2017-06-18 20:12:01 -06:00
|
|
|
if (!was_stopped && ts->tick_stopped) {
|
2011-07-31 16:06:10 -06:00
|
|
|
ts->idle_jiffies = ts->last_jiffies;
|
2017-06-18 20:12:01 -06:00
|
|
|
nohz_balance_enter_idle(cpu);
|
|
|
|
}
|
2018-04-05 11:07:57 -06:00
|
|
|
} else {
|
|
|
|
tick_nohz_retain_tick(ts);
|
2011-07-31 16:06:10 -06:00
|
|
|
}
|
2011-10-07 10:22:06 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-04-05 10:58:27 -06:00
|
|
|
* tick_nohz_idle_stop_tick - stop the idle tick from the idle task
|
2011-10-07 10:22:06 -06:00
|
|
|
*
|
|
|
|
* When the next event is more than a tick into the future, stop the idle tick
|
2018-04-05 10:58:27 -06:00
|
|
|
*/
|
|
|
|
void tick_nohz_idle_stop_tick(void)
|
|
|
|
{
|
|
|
|
__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
|
|
|
|
}
|
|
|
|
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
void tick_nohz_idle_retain_tick(void)
|
|
|
|
{
|
|
|
|
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
|
|
|
|
/*
|
|
|
|
* Undo the effect of get_next_timer_interrupt() called from
|
|
|
|
* tick_nohz_next_event().
|
|
|
|
*/
|
|
|
|
timer_clear_idle();
|
|
|
|
}
|
|
|
|
|
2018-04-05 10:58:27 -06:00
|
|
|
/**
|
|
|
|
* tick_nohz_idle_enter - prepare for entering idle on the current CPU
|
2011-10-08 08:01:00 -06:00
|
|
|
*
|
2018-04-05 10:58:27 -06:00
|
|
|
* Called when we start the idle loop.
|
2011-10-07 10:22:06 -06:00
|
|
|
*/
|
2011-11-17 10:48:14 -07:00
|
|
|
void tick_nohz_idle_enter(void)
|
2011-10-07 10:22:06 -06:00
|
|
|
{
|
|
|
|
struct tick_sched *ts;
|
|
|
|
|
2017-11-06 08:01:20 -07:00
|
|
|
lockdep_assert_irqs_enabled();
|
2012-01-06 09:33:28 -07:00
|
|
|
|
2011-11-17 10:48:14 -07:00
|
|
|
local_irq_disable();
|
|
|
|
|
2014-08-17 11:30:25 -06:00
|
|
|
ts = this_cpu_ptr(&tick_cpu_sched);
|
2018-04-05 11:07:57 -06:00
|
|
|
|
|
|
|
WARN_ON_ONCE(ts->timer_expires_base);
|
|
|
|
|
2011-10-07 10:22:06 -06:00
|
|
|
ts->inidle = 1;
|
2018-04-05 10:58:27 -06:00
|
|
|
tick_nohz_start_idle(ts);
|
2011-11-17 10:48:14 -07:00
|
|
|
|
|
|
|
local_irq_enable();
|
2011-10-07 10:22:06 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* tick_nohz_irq_exit - update next tick event from interrupt exit
|
|
|
|
*
|
|
|
|
* When an interrupt fires while we are idle and it doesn't cause
|
|
|
|
* a reschedule, it may still add, modify or delete a timer, enqueue
|
|
|
|
* an RCU callback, etc...
|
|
|
|
* So we need to re-calculate and reprogram the next tick event.
|
|
|
|
*/
|
|
|
|
void tick_nohz_irq_exit(void)
|
|
|
|
{
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2011-10-07 10:22:06 -06:00
|
|
|
|
Revert "cpuidle: Quickly notice prediction failure for repeat mode"
Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for
repeat mode), because it has been identified as the source of a
significant performance regression in v3.8 and later as explained by
Jeremy Eder:
We believe we've identified a particular commit to the cpuidle code
that seems to be impacting performance of variety of workloads.
The simplest way to reproduce is using netperf TCP_RR test, so
we're using that, on a pair of Sandy Bridge based servers. We also
have data from a large database setup where performance is also
measurably/positively impacted, though that test data isn't easily
share-able.
Included below are test results from 3 test kernels:
kernel reverts
-----------------------------------------------------------
1) vanilla upstream (no reverts)
2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c
3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
e11538d1f03914eb92af5a1a378375c05ae8520c
In summary, netperf TCP_RR numbers improve by approximately 4%
after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When
69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency
never seems to get above 40%. Taking that patch out gets C0 near
100% quite often, and performance increases.
The below data are histograms representing the %c0 residency @
1-second sample rates (using turbostat), while under netperf test.
- If you look at the first 4 histograms, you can see %c0 residency
almost entirely in the 30,40% bin.
- The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4,
shows %c0 in the 80,90,100% bins.
Below each kernel name are netperf TCP_RR trans/s numbers for the
particular kernel that can be disclosed publicly, comparing the 3
test kernels. We ran a 4th test with the vanilla kernel where
we've also set /dev/cpu_dma_latency=0 to show overall impact
boosting single-threaded TCP_RR performance over 11% above
baseline.
3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0):
TCP_RR trans/s 54323.78
-----------------------------------------------------------
3.10-rc2 vanilla RX (no reverts)
TCP_RR trans/s 48192.47
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 59]:
***********************************************************
40.0000 - 50.0000 [ 1]: *
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
Sender %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 11]: ***********
40.0000 - 50.0000 [ 49]:
*************************************************
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
-----------------------------------------------------------
3.10-rc2 perfteam2 RX (reverts commit
e11538d1f03914eb92af5a1a378375c05ae8520c)
TCP_RR trans/s 49698.69
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 1]: *
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 59]:
***********************************************************
40.0000 - 50.0000 [ 0]:
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
Sender %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 2]: **
40.0000 - 50.0000 [ 58]:
**********************************************************
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
-----------------------------------------------------------
3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
and e11538d1f03914eb92af5a1a378375c05ae8520c)
TCP_RR trans/s 47766.95
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 1]: *
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 27]: ***************************
40.0000 - 50.0000 [ 2]: **
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 2]: **
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 28]: ****************************
Sender:
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 11]: ***********
40.0000 - 50.0000 [ 0]:
50.0000 - 60.0000 [ 1]: *
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 3]: ***
80.0000 - 90.0000 [ 7]: *******
90.0000 - 100.0000 [ 38]: **************************************
These results demonstrate gaining back the tendency of the CPU to
stay in more responsive, performant C-states (and thus yield
measurably better performance), by reverting commit
69a37beabf1f0a6705c08e879bdd5d82ff6486c4.
Requested-by: Jeremy Eder <jeder@redhat.com>
Tested-by: Len Brown <len.brown@intel.com>
Cc: 3.8+ <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-07-26 17:41:34 -06:00
|
|
|
if (ts->inidle)
|
2018-04-05 10:58:27 -06:00
|
|
|
tick_nohz_start_idle(ts);
|
Revert "cpuidle: Quickly notice prediction failure for repeat mode"
Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for
repeat mode), because it has been identified as the source of a
significant performance regression in v3.8 and later as explained by
Jeremy Eder:
We believe we've identified a particular commit to the cpuidle code
that seems to be impacting performance of variety of workloads.
The simplest way to reproduce is using netperf TCP_RR test, so
we're using that, on a pair of Sandy Bridge based servers. We also
have data from a large database setup where performance is also
measurably/positively impacted, though that test data isn't easily
share-able.
Included below are test results from 3 test kernels:
kernel reverts
-----------------------------------------------------------
1) vanilla upstream (no reverts)
2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c
3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
e11538d1f03914eb92af5a1a378375c05ae8520c
In summary, netperf TCP_RR numbers improve by approximately 4%
after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When
69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency
never seems to get above 40%. Taking that patch out gets C0 near
100% quite often, and performance increases.
The below data are histograms representing the %c0 residency @
1-second sample rates (using turbostat), while under netperf test.
- If you look at the first 4 histograms, you can see %c0 residency
almost entirely in the 30,40% bin.
- The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4,
shows %c0 in the 80,90,100% bins.
Below each kernel name are netperf TCP_RR trans/s numbers for the
particular kernel that can be disclosed publicly, comparing the 3
test kernels. We ran a 4th test with the vanilla kernel where
we've also set /dev/cpu_dma_latency=0 to show overall impact
boosting single-threaded TCP_RR performance over 11% above
baseline.
3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0):
TCP_RR trans/s 54323.78
-----------------------------------------------------------
3.10-rc2 vanilla RX (no reverts)
TCP_RR trans/s 48192.47
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 59]:
***********************************************************
40.0000 - 50.0000 [ 1]: *
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
Sender %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 11]: ***********
40.0000 - 50.0000 [ 49]:
*************************************************
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
-----------------------------------------------------------
3.10-rc2 perfteam2 RX (reverts commit
e11538d1f03914eb92af5a1a378375c05ae8520c)
TCP_RR trans/s 49698.69
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 1]: *
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 59]:
***********************************************************
40.0000 - 50.0000 [ 0]:
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
Sender %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 2]: **
40.0000 - 50.0000 [ 58]:
**********************************************************
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 0]:
-----------------------------------------------------------
3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
and e11538d1f03914eb92af5a1a378375c05ae8520c)
TCP_RR trans/s 47766.95
Receiver %c0
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 1]: *
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 27]: ***************************
40.0000 - 50.0000 [ 2]: **
50.0000 - 60.0000 [ 0]:
60.0000 - 70.0000 [ 2]: **
70.0000 - 80.0000 [ 0]:
80.0000 - 90.0000 [ 0]:
90.0000 - 100.0000 [ 28]: ****************************
Sender:
0.0000 - 10.0000 [ 1]: *
10.0000 - 20.0000 [ 0]:
20.0000 - 30.0000 [ 0]:
30.0000 - 40.0000 [ 11]: ***********
40.0000 - 50.0000 [ 0]:
50.0000 - 60.0000 [ 1]: *
60.0000 - 70.0000 [ 0]:
70.0000 - 80.0000 [ 3]: ***
80.0000 - 90.0000 [ 7]: *******
90.0000 - 100.0000 [ 38]: **************************************
These results demonstrate gaining back the tendency of the CPU to
stay in more responsive, performant C-states (and thus yield
measurably better performance), by reverting commit
69a37beabf1f0a6705c08e879bdd5d82ff6486c4.
Requested-by: Jeremy Eder <jeder@redhat.com>
Tested-by: Len Brown <len.brown@intel.com>
Cc: 3.8+ <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-07-26 17:41:34 -06:00
|
|
|
else
|
2015-05-27 11:22:08 -06:00
|
|
|
tick_nohz_full_update_tick(ts);
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
/**
|
cpuidle: Return nohz hint from cpuidle_select()
Add a new pointer argument to cpuidle_select() and to the ->select
cpuidle governor callback to allow a boolean value indicating
whether or not the tick should be stopped before entering the
selected state to be returned from there.
Make the ladder governor ignore that pointer (to preserve its
current behavior) and make the menu governor return 'false" through
it if:
(1) the idle exit latency is constrained at 0, or
(2) the selected state is a polling one, or
(3) the expected idle period duration is within the tick period
range.
In addition to that, the correction factor computations in the menu
governor need to take the possibility that the tick may not be
stopped into account to avoid artificially small correction factor
values. To that end, add a mechanism to record tick wakeups, as
suggested by Peter Zijlstra, and use it to modify the menu_update()
behavior when tick wakeup occurs. Namely, if the CPU is woken up by
the tick and the return value of tick_nohz_get_sleep_length() is not
within the tick boundary, the predicted idle duration is likely too
short, so make menu_update() try to compensate for that by updating
the governor statistics as though the CPU was idle for a long time.
Since the value returned through the new argument pointer of
cpuidle_select() is not used by its caller yet, this change by
itself is not expected to alter the functionality of the code.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
2018-03-22 10:50:49 -06:00
|
|
|
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
|
|
|
|
*/
|
|
|
|
bool tick_nohz_idle_got_tick(void)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
|
|
|
|
2018-04-05 20:32:37 -06:00
|
|
|
if (ts->got_idle_tick) {
|
|
|
|
ts->got_idle_tick = 0;
|
cpuidle: Return nohz hint from cpuidle_select()
Add a new pointer argument to cpuidle_select() and to the ->select
cpuidle governor callback to allow a boolean value indicating
whether or not the tick should be stopped before entering the
selected state to be returned from there.
Make the ladder governor ignore that pointer (to preserve its
current behavior) and make the menu governor return 'false" through
it if:
(1) the idle exit latency is constrained at 0, or
(2) the selected state is a polling one, or
(3) the expected idle period duration is within the tick period
range.
In addition to that, the correction factor computations in the menu
governor need to take the possibility that the tick may not be
stopped into account to avoid artificially small correction factor
values. To that end, add a mechanism to record tick wakeups, as
suggested by Peter Zijlstra, and use it to modify the menu_update()
behavior when tick wakeup occurs. Namely, if the CPU is woken up by
the tick and the return value of tick_nohz_get_sleep_length() is not
within the tick boundary, the predicted idle duration is likely too
short, so make menu_update() try to compensate for that by updating
the governor statistics as though the CPU was idle for a long time.
Since the value returned through the new argument pointer of
cpuidle_select() is not used by its caller yet, this change by
itself is not expected to alter the functionality of the code.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
2018-03-22 10:50:49 -06:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
/**
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
* tick_nohz_get_sleep_length - return the expected length of the current sleep
|
2018-04-05 11:12:34 -06:00
|
|
|
* @delta_next: duration until the next event if the tick cannot be stopped
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
*
|
|
|
|
* Called from power state control code with interrupts disabled
|
|
|
|
*/
|
2018-04-05 11:12:34 -06:00
|
|
|
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
{
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
int cpu = smp_processor_id();
|
|
|
|
/*
|
|
|
|
* The idle entry time is expected to be a sufficient approximation of
|
|
|
|
* the current time at this point.
|
|
|
|
*/
|
|
|
|
ktime_t now = ts->idle_entrytime;
|
|
|
|
ktime_t next_event;
|
|
|
|
|
|
|
|
WARN_ON_ONCE(!ts->inidle);
|
|
|
|
|
2018-04-05 11:12:34 -06:00
|
|
|
*delta_next = ktime_sub(dev->next_event, now);
|
|
|
|
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
if (!can_stop_idle_tick(cpu, ts))
|
2018-04-05 11:12:34 -06:00
|
|
|
return *delta_next;
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
|
|
|
|
next_event = tick_nohz_next_event(ts, cpu);
|
|
|
|
if (!next_event)
|
2018-04-05 11:12:34 -06:00
|
|
|
return *delta_next;
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the next highres timer to expire is earlier than next_event, the
|
|
|
|
* idle governor needs to know that.
|
|
|
|
*/
|
|
|
|
next_event = min_t(u64, next_event,
|
|
|
|
hrtimer_next_event_without(&ts->sched_timer));
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
|
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions
by the idle governor after the scheduler tick has been stopped,
reorder the code in cpuidle_idle_call() so that the governor idle
state selection runs before tick_nohz_idle_go_idle() and use the
"nohz" hint returned by cpuidle_select() to decide whether or not
to stop the tick.
This isn't straightforward, because menu_select() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_stop_tick(). Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.
Namely, tick_nohz_get_sleep_length() can be made call
tick_nohz_next_event(), introduced earlier, to get the time to the
next non-highres timer event. If that happens, tick_nohz_next_event()
need not be called by __tick_nohz_idle_stop_tick() again.
If it turns out that the scheduler tick cannot be stopped going
forward or the next timer event is too close for the tick to be
stopped, tick_nohz_get_sleep_length() can simply return the time to
the next event currently programmed into the corresponding clock
event device.
In addition to knowing the return value of tick_nohz_next_event(),
however, tick_nohz_get_sleep_length() needs to know the time to the
next highres timer event, but with the scheduler tick timer excluded,
which can be computed with the help of hrtimer_get_next_event().
That minimum of that number and the tick_nohz_next_event() return
value is the total time to the next timer event with the assumption
that the tick will be stopped. It can be returned to the idle
governor which can use it for predicting idle duration (under the
assumption that the tick will be stopped) and deciding whether or
not it makes sense to stop the tick before putting the CPU into the
selected idle state.
With the above, the sleep_length field in struct tick_sched is not
necessary any more, so drop it.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227
Reported-by: Doug Smythies <dsmythies@telus.net>
Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
2018-04-03 15:17:11 -06:00
|
|
|
return ktime_sub(next_event, now);
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
}
|
2020-04-13 12:25:50 -06:00
|
|
|
EXPORT_SYMBOL_GPL(tick_nohz_get_sleep_length);
|
cpuidle: consolidate 2.6.22 cpuidle branch into one patch
commit e5a16b1f9eec0af7cfa0830304b41c1c0833cf9f
Author: Len Brown <len.brown@intel.com>
Date: Tue Oct 2 23:44:44 2007 -0400
cpuidle: shrink diff
processor_idle.c | 440 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 429 insertions(+), 11 deletions(-)
Signed-off-by: Len Brown <len.brown@intel.com>
commit dfbb9d5aedfb18848a3e0d6f6e3e4969febb209c
Author: Len Brown <len.brown@intel.com>
Date: Wed Sep 26 02:17:55 2007 -0400
cpuidle: reduce diff size
Reduces the cpuidle processor_idle.c diff vs 2.6.22 from this
processor_idle.c | 2006 ++++++++++++++++++++++++++-----------------
1 file changed, 1219 insertions(+), 787 deletions(-)
to this:
processor_idle.c | 502 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 458 insertions(+), 44 deletions(-)
...for the purpose of making the cpuilde patch less invasive
and easier to review.
no functional changes. build tested only.
Signed-off-by: Len Brown <len.brown@intel.com>
commit 889172fc915f5a7fe20f35b133cbd205ce69bf6c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:40:05 2007 -0700
cpuidle: Retain old ACPI policy for !CONFIG_CPU_IDLE
Retain the old policy in processor_idle, so that when CPU_IDLE is not
configured, old C-state policy will still be used. This provides a
clean gradual migration path from old ACPI policy to new cpuidle
based policy.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 9544a8181edc7ecc33b3bfd69271571f98ed08bc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Sep 13 13:39:17 2007 -0700
cpuidle: Configure governors by default
Quoting Len "Do not give an option to users to shoot themselves in the foot".
Remove the configurability of ladder and menu governors as they are
needed for default policy of cpuidle. That way users will not be able to
have cpuidle without any policy loosing all C-state power savings.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8975059a2c1e56cfe83d1bcf031bcf4cb39be743
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:27:07 2007 -0400
CPUIDLE: load ACPI properly when CPUIDLE is disabled
Change the registration return codes for when CPUIDLE
support is not compiled into the kernel. As a result, the ACPI
processor driver will load properly even if CPUIDLE is unavailable.
However, it may be possible to cleanup the ACPI processor driver further
and eliminate some dead code paths.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e0322e2b58dd1b12ec669bf84693efe0dc2414a8
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:26:06 2007 -0400
CPUIDLE: remove cpuidle_get_bm_activity()
Remove cpuidle_get_bm_activity() and updates governors
accordingly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 18a6e770d5c82ba26653e53d240caa617e09e9ab
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:58 2007 -0400
CPUIDLE: max_cstate fix
Currently max_cstate is limited to 0, resulting in no idle processor
power management on ACPI platforms. This patch restores the value to
the array size.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1fdc0887286179b40ce24bcdbde663172e205ef0
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:40 2007 -0400
CPUIDLE: handle BM detection inside the ACPI Processor driver
Update the ACPI processor driver to detect BM activity and
limit state entry depth internally, rather than exposing such
requirements to CPUIDLE. As a result, CPUIDLE can drop this
ACPI-specific interface and become more platform independent. BM
activity is now handled much more aggressively than it was in the
original implementation, so some testing coverage may be needed to
verify that this doesn't introduce any DMA buffer under-run issues.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ef38840db666f48e3cdd2b769da676c57228dd9
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:25:14 2007 -0400
CPUIDLE: menu governor updates
Tweak the menu governor to more effectively handle non-timer
break events. Non-timer break events are detected by comparing the
actual sleep time to the expected sleep time. In future revisions, it
may be more reliable to use the timer data structures directly.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit bb4d74fca63fa96cf3ace644b15ae0f12b7df5a1
Author: Adam Belay <abelay@novell.com>
Date: Tue Aug 21 18:24:40 2007 -0400
CPUIDLE: fix 'current_governor' sysfs entry
Allow the "current_governor" sysfs entry to properly handle
input terminated with '\n'.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df3c71559bb69b125f1a48971bf0d17f78bbdf47
Author: Len Brown <len.brown@intel.com>
Date: Sun Aug 12 02:00:45 2007 -0400
cpuidle: fix IA64 build (again)
Signed-off-by: Len Brown <len.brown@intel.com>
commit a02064579e3f9530fd31baae16b1fc46b5a7bca8
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:27 2007 -0400
cpuidle: Remove support for runtime changing of max_cstate
Remove support for runtime changeability of max_cstate. Drivers can use
use latency APIs.
max_cstate can still be used as a boot time option and dmi override.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0912a44b13adf22f5e3f607d263aed23b4910d7e
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:39:16 2007 -0400
cpuidle: Remove ACPI cstate_limit calls from ipw2100
ipw2100 already has code to use accetable_latency interfaces to limit the
C-state. Remove the calls to acpi_set_cstate_limit and acpi_get_cstate_limit
as they are redundant.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c649a76e76be6bff1fd770d0a775798813a3f6e0
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Sun Aug 12 01:35:39 2007 -0400
cpuidle: compile fix for pause and resume functions
Fix the compilation failure when cpuidle is not compiled in.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Adam Belay <adam.belay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2305a5920fb8ee6ccec1c62ade05aa8351091d71
Author: Adam Belay <abelay@novell.com>
Date: Thu Jul 19 00:49:00 2007 -0400
cpuidle: re-write
Some portions have been rewritten to make the code cleaner and lighter
weight. The following is a list of changes:
1.) the state name is now included in the sysfs interface
2.) detection, hotplug, and available state modifications are handled by
CPUIDLE drivers directly
3.) the CPUIDLE idle handler is only ever installed when at least one
cpuidle_device is enabled and ready
4.) the menu governor BM code no longer overflows
5.) the sysfs attributes are now printed as unsigned integers, avoiding
negative values
6.) a variety of other small cleanups
Also, Idle drivers are no longer swappable during runtime through the
CPUIDLE sysfs inteface. On i386 and x86_64 most idle handlers (e.g.
poll, mwait, halt, etc.) don't benefit from an infrastructure that
supports multiple states, so I think using a more general case idle
handler selection mechanism would be cleaner.
Signed-off-by: Adam Belay <abelay@novell.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit df25b6b56955714e6e24b574d88d1fd11f0c3ee5
Author: Len Brown <len.brown@intel.com>
Date: Tue Jul 24 17:08:21 2007 -0400
cpuidle: fix IA64 buid
Signed-off-by: Len Brown <len.brown@intel.com>
commit fd6ada4c14488755ff7068860078c437431fbccd
Author: Adrian Bunk <bunk@stusta.de>
Date: Mon Jul 9 11:33:13 2007 -0700
cpuidle: static
make cpuidle_replace_governor() static
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c1d4a2cebcadf2429c0c72e1d29aa2a9684c32e0
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Jul 3 00:54:40 2007 -0400
cpuidle: static
This patch makes the needlessly global struct menu_governor static.
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit dbf8780c6e8d572c2c273da97ed1cca7608fd999
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:49:14 2007 -0400
export symbol tick_nohz_get_sleep_length
ERROR: "tick_nohz_get_sleep_length" [drivers/cpuidle/governors/menu.ko] undefined!
ERROR: "tick_nohz_get_idle_jiffies" [drivers/cpuidle/governors/menu.ko] undefined!
And please be sure to get your changes to core kernel suitably reviewed.
Cc: Adam Belay <abelay@novell.com>
Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 29f0e248e7017be15f99febf9143a2cef00b2961
Author: Andrew Morton <akpm@linux-foundation.org>
Date: Tue Jul 3 00:43:04 2007 -0400
tick.h needs hrtimer.h
It uses hrtimers.
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit e40cede7d63a029e92712a3fe02faee60cc38fb4
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:40:34 2007 -0400
cpuidle: first round of documentation updates
Documentation changes based on Pavel's feedback.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 83b42be2efece386976507555c29e7773a0dfcd1
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:39:25 2007 -0400
cpuidle: add rating to the governors and pick the one with highest rating by default
Introduce a governor rating scheme to pick the right governor by default.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d2a74b8c5e8f22def4709330d4bfc4a29209b71c
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:38:08 2007 -0400
cpuidle: make cpuidle sysfs driver governor switch off by default
Make default cpuidle sysfs to show current_governor and current_driver in
read-only mode. More elaborate available_governors and available_drivers with
writeable current_governor and current_driver interface only appear with
"cpuidle_sysfs_switch" boot parameter.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1f60a0e80bf83cf6b55c8845bbe5596ed8f6307b
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:37:00 2007 -0400
cpuidle: menu governor: change the early break condition
Change the C-state early break out algorithm in menu governor.
We only look at early breakouts that result in wakeups shorter than idle
state's target_residency. If such a breakout is frequent enough, eliminate
the particular idle state upto a timeout period.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 45a42095cf64b003b4a69be3ce7f434f97d7af51
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:35:38 2007 -0400
cpuidle: fix uninitialized variable in sysfs routine
Fix the uninitialized usage of ret.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 80dca7cdba3e6ee13eae277660873ab9584eb3be
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:34:16 2007 -0400
cpuidle: reenable /proc/acpi//power interface for the time being
Keep /proc/acpi/processor/CPU*/power around for a while as powertop depends
on it. It will be marked deprecated and removed in future. powertop can use
cpuidle interfaces instead.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 589c37c2646c5e3813a51255a5ee1159cb4c33fc
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Jul 3 00:32:37 2007 -0400
cpuidle: menu governor and hrtimer compile fix
Compile fix for menu governor.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0ba80bd9ab3ed304cb4f19b722e4cc6740588b5e
Author: Len Brown <len.brown@intel.com>
Date: Thu May 31 22:51:43 2007 -0400
cpuidle: build fix - cpuidle vs ipw2100 module
ERROR: "acpi_set_cstate_limit" [drivers/net/wireless/ipw2100.ko] undefined!
Signed-off-by: Len Brown <len.brown@intel.com>
commit d7d8fa7f96a7f7682be7c6cc0cc53fa7a18c3b58
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:07 2007 -0400
cpuidle: add the 'menu' governor
Here is my first take at implementing an idle PM governor that takes
full advantage of NO_HZ. I call it the 'menu' governor because it
considers the full list of idle states before each entry.
I've kept the implementation fairly simple. It attempts to guess the
next residency time and then chooses a state that would meet at least
the break-even point between power savings and entry cost. To this end,
it selects the deepest idle state that satisfies the following
constraints:
1. If the idle time elapsed since bus master activity was detected
is below a threshold (currently 20 ms), then limit the selection
to C2-type or above.
2. Do not choose a state with a break-even residency that exceeds
the expected time remaining until the next timer interrupt.
3. Do not choose a state with a break-even residency that exceeds
the elapsed time between the last pair of break events,
excluding timer interrupts.
This governor has an advantage over "ladder" governor because it
proactively checks how much time remains until the next timer interrupt
using the tick infrastructure. Also, it handles device interrupt
activity more intelligently by not including timer interrupts in break
event calculations. Finally, it doesn't make policy decisions using the
number of state entries, which can have variable residency times (NO_HZ
makes these potentially very large), and instead only considers sleep
time deltas.
The menu governor can be selected during runtime using the cpuidle sysfs
interface like so:
"echo "menu" > /sys/devices/system/cpu/cpuidle/current_governor"
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit a4bec7e65aa3b7488b879d971651cc99a6c410fe
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:47:03 2007 -0400
cpuidle: export time until next timer interrupt using NO_HZ
Expose information about the time remaining until the next
timer interrupt expires by utilizing the dynticks infrastructure.
Also modify the main idle loop to allow dynticks to handle
non-interrupt break events (e.g. DMA). Finally, expose sleep ticks
information to external code. Thomas Gleixner is responsible for much
of the code in this patch. However, I've made some additional changes,
so I'm probably responsible if there are any bugs or oversights :)
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 2929d8996fbc77f41a5ff86bb67cdde3ca7d2d72
Author: Adam Belay <abelay@novell.com>
Date: Sat Mar 24 03:46:58 2007 -0400
cpuidle: governor API changes
This patch prepares cpuidle for the menu governor. It adds an optional
stage after idle state entry to give the governor an opportunity to
check why the state was exited. Also it makes sure the idle loop
returns after each state entry, allowing the appropriate dynticks code
to run.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 3a7fd42f9825c3b03e364ca59baa751bb350775f
Author: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Apr 26 00:03:59 2007 -0700
cpuidle: hang fix
Prevent hang on x86-64, when ACPI processor driver is added as a module on
a system that does not support C-states.
x86-64 expects all idle handlers to enable interrupts before returning from
idle handler. This is due to enter_idle(), exit_idle() races. Make
cpuidle_idle_call() confirm to this when there is no pm_idle_old.
Also, cpuidle look at the return values of attch_driver() and set
current_driver to NULL if attach fails on all CPUs.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4893339a142afbd5b7c01ffadfd53d14746e858e
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:09 2007 +0800
cpuidle: add support for max_cstate limit
With CPUIDLE framework, the max_cstate (to limit max cpu c-state)
parameter is ingored. Some systems require it to ignore C2/C3
and some drivers like ipw require it too.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 43bbbbe1cb998cbd2df656f55bb3bfe30f30e7d1
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:13 2007 +0800
cpuidle: add cpuidle_fore_redetect_devices API
add cpuidle_force_redetect_devices API,
which forces all CPU redetect idle states.
Next patch will use it.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit d1edadd608f24836def5ec483d2edccfb37b1d19
Author: Shaohua Li <shaohua.li@intel.com>
Date: Thu Apr 26 10:40:01 2007 +0800
cpuidle: fix sysfs related issue
Fix the cpuidle sysfs issue.
a. make kobject dynamicaly allocated
b. fixed sysfs init issue to avoid suspend/resume issue
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 7169a5cc0d67b263978859672e86c13c23a5570d
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed Mar 28 22:52:53 2007 -0400
cpuidle: 1-bit field must be unsigned
A 1-bit bitfield has no room for a sign bit.
drivers/cpuidle/governors/ladder.c:54:16: error: dubious bitfield without explicit `signed' or `unsigned'
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 4658620158dc2fbd9e4bcb213c5b6fb5d05ba7d4
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 28 22:52:41 2007 -0400
cpuidle: fix boot hang
Patch for cpuidle boot hang reported by Larry Finger here.
http://www.ussg.iu.edu/hypermail/linux/kernel/0703.2/2025.html
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Larry Finger <larry.finger@lwfinger.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit c17e168aa6e5fe3851baaae8df2fbc1cf11443a9
Author: Len Brown <len.brown@intel.com>
Date: Wed Mar 7 04:37:53 2007 -0500
cpuidle: ladder does not depend on ACPI
build fix for CONFIG_ACPI=n
In file included from drivers/cpuidle/governors/ladder.c:21:
include/acpi/processor.h:88: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:106: error: expected specifier-qualifier-list before âacpi_integerâ
include/acpi/processor.h:168: error: expected specifier-qualifier-list before âacpi_handleâ
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8c91d958246bde68db0c3f0c57b535962ce861cb
Author: Adrian Bunk <bunk@stusta.de>
Date: Tue Mar 6 02:29:40 2007 -0800
cpuidle: make code static
This patch makes the following needlessly global code static:
- driver.c: __cpuidle_find_driver()
- governor.c: __cpuidle_find_governor()
- ladder.c: struct ladder_governor
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 0c39dc3187094c72c33ab65a64d2017b21f372d2
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed Mar 7 02:38:22 2007 -0500
cpu_idle: fix build break
This patch fixes a build breakage with !CONFIG_HOTPLUG_CPU and
CONFIG_CPU_IDLE.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 8112e3b115659b07df340ef170515799c0105f82
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue Mar 6 02:29:39 2007 -0800
cpuidle: build fix for !CPU_IDLE
Fix the compile issues when CPU_IDLE is not configured.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Adam Belay <abelay@novell.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 1eb4431e9599cd25e0d9872f3c2c8986821839dd
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:57 2007 -0800
cpuidle take2: Basic documentation for cpuidle
Documentation for cpuidle infrastructure
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit ef5f15a8b79123a047285ec2e3899108661df779
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:54:03 2007 -0800
cpuidle take2: Hookup ACPI C-states driver with cpuidle
Hookup ACPI C-states onto generic cpuidle infrastructure.
drivers/acpi/procesor_idle.c is now a ACPI C-states driver that registers as
a driver in cpuidle infrastructure and the policy part is removed from
drivers/acpi/processor_idle.c. We use governor in cpuidle instead.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Len Brown <len.brown@intel.com>
commit 987196fa82d4db52c407e8c9d5dec884ba602183
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu Feb 22 13:52:57 2007 -0800
cpuidle take2: Core cpuidle infrastructure
Announcing 'cpuidle', a new CPU power management infrastructure to manage
idle CPUs in a clean and efficient manner.
cpuidle separates out the drivers that can provide support for multiple types
of idle states and policy governors that decide on what idle state to use
at run time.
A cpuidle driver can support multiple idle states based on parameters like
varying power consumption, wakeup latency, etc (ACPI C-states for example).
A cpuidle governor can be usage model specific (laptop, server,
laptop on battery etc).
Main advantage of the infrastructure being, it allows independent development
of drivers and governors and allows for better CPU power management.
A huge thanks to Adam Belay and Shaohua Li who were part of this mini-project
since its beginning and are greatly responsible for this patchset.
This patch:
Core cpuidle infrastructure.
Introduces a new abstraction layer for cpuidle:
* which manages drivers that can support multiple idles states. Drivers
can be generic or particular to specific hardware/platform
* allows pluging in multiple policy governors that can take idle state policy
decision
* The core also has a set of sysfs interfaces with which administrato can know
about supported drivers and governors and switch them at run time.
Signed-off-by: Adam Belay <abelay@novell.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2007-10-03 16:58:00 -06:00
|
|
|
|
2017-12-20 18:22:45 -07:00
|
|
|
/**
|
|
|
|
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
|
|
|
|
* for a particular CPU.
|
|
|
|
*
|
|
|
|
* Called from the schedutil frequency scaling governor in scheduler context.
|
|
|
|
*/
|
|
|
|
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = tick_get_tick_sched(cpu);
|
|
|
|
|
|
|
|
return ts->idle_calls;
|
|
|
|
}
|
|
|
|
|
cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely
The way the schedutil governor uses the PELT metric causes it to
underestimate the CPU utilization in some cases.
That can be easily demonstrated by running kernel compilation on
a Sandy Bridge Intel processor, running turbostat in parallel with
it and looking at the values written to the MSR_IA32_PERF_CTL
register. Namely, the expected result would be that when all CPUs
were 100% busy, all of them would be requested to run in the maximum
P-state, but observation shows that this clearly isn't the case.
The CPUs run in the maximum P-state for a while and then are
requested to run slower and go back to the maximum P-state after
a while again. That causes the actual frequency of the processor to
visibly oscillate below the sustainable maximum in a jittery fashion
which clearly is not desirable.
That has been attributed to CPU utilization metric updates on task
migration that cause the total utilization value for the CPU to be
reduced by the utilization of the migrated task. If that happens,
the schedutil governor may see a CPU utilization reduction and will
attempt to reduce the CPU frequency accordingly right away. That
may be premature, though, for example if the system is generally
busy and there are other runnable tasks waiting to be run on that
CPU already.
This is unlikely to be an issue on systems where cpufreq policies are
shared between multiple CPUs, because in those cases the policy
utilization is computed as the maximum of the CPU utilization values
over the whole policy and if that turns out to be low, reducing the
frequency for the policy most likely is a good idea anyway. On
systems with one CPU per policy, however, it may affect performance
adversely and even lead to increased energy consumption in some cases.
On those systems it may be addressed by taking another utilization
metric into consideration, like whether or not the CPU whose
frequency is about to be reduced has been idle recently, because if
that's not the case, the CPU is likely to be busy in the near future
and its frequency should not be reduced.
To that end, use the counter of idle calls in the timekeeping code.
Namely, make the schedutil governor look at that counter for the
current CPU every time before its frequency is about to be reduced.
If the counter has not changed since the previous iteration of the
governor computations for that CPU, the CPU has been busy for all
that time and its frequency should not be decreased, so if the new
frequency would be lower than the one set previously, the governor
will skip the frequency update.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Joel Fernandes <joelaf@google.com>
2017-03-21 17:08:50 -06:00
|
|
|
/**
|
|
|
|
* tick_nohz_get_idle_calls - return the current idle calls counter value
|
|
|
|
*
|
|
|
|
* Called from the schedutil frequency scaling governor in scheduler context.
|
|
|
|
*/
|
|
|
|
unsigned long tick_nohz_get_idle_calls(void)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
|
|
|
|
|
|
|
return ts->idle_calls;
|
|
|
|
}
|
|
|
|
|
2011-07-27 20:00:47 -06:00
|
|
|
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
|
|
|
|
{
|
2012-07-16 10:00:34 -06:00
|
|
|
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
2011-07-27 20:00:47 -06:00
|
|
|
unsigned long ticks;
|
2012-07-16 10:00:34 -06:00
|
|
|
|
2015-11-19 08:47:32 -07:00
|
|
|
if (vtime_accounting_cpu_enabled())
|
2012-07-16 10:00:34 -06:00
|
|
|
return;
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* We stopped the tick in idle. Update process times would miss the
|
|
|
|
* time we slept as update_process_times does only a 1 tick
|
|
|
|
* accounting. Enforce that this is accounted to idle !
|
|
|
|
*/
|
|
|
|
ticks = jiffies - ts->idle_jiffies;
|
|
|
|
/*
|
|
|
|
* We might be one off. Do not randomly account a huge number of ticks!
|
|
|
|
*/
|
2008-12-31 07:11:38 -07:00
|
|
|
if (ticks && ticks < LONG_MAX)
|
|
|
|
account_idle_ticks(ticks);
|
|
|
|
#endif
|
2011-07-27 09:29:28 -06:00
|
|
|
}
|
|
|
|
|
2018-03-15 16:05:50 -06:00
|
|
|
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
|
|
|
|
{
|
|
|
|
tick_nohz_restart_sched_tick(ts, now);
|
|
|
|
tick_nohz_account_idle_ticks(ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tick_nohz_idle_restart_tick(void)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
|
|
|
|
|
|
|
if (ts->tick_stopped)
|
|
|
|
__tick_nohz_idle_restart_tick(ts, ktime_get());
|
|
|
|
}
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/**
|
2011-10-07 10:22:06 -06:00
|
|
|
* tick_nohz_idle_exit - restart the idle tick from the idle task
|
2007-02-16 02:28:03 -07:00
|
|
|
*
|
|
|
|
* Restart the idle tick when the CPU is woken up from idle
|
2011-10-07 10:22:06 -06:00
|
|
|
* This also exit the RCU extended quiescent state. The CPU
|
|
|
|
* can use RCU again after this function is called.
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
2011-10-07 10:22:06 -06:00
|
|
|
void tick_nohz_idle_exit(void)
|
2007-02-16 02:28:03 -07:00
|
|
|
{
|
2014-08-17 11:30:27 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2018-04-09 06:23:33 -06:00
|
|
|
bool idle_active, tick_stopped;
|
2008-01-30 05:30:04 -07:00
|
|
|
ktime_t now;
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2008-01-30 05:30:04 -07:00
|
|
|
local_irq_disable();
|
2011-10-08 08:01:00 -06:00
|
|
|
|
2012-01-24 10:59:43 -07:00
|
|
|
WARN_ON_ONCE(!ts->inidle);
|
2018-04-05 11:07:57 -06:00
|
|
|
WARN_ON_ONCE(ts->timer_expires_base);
|
2012-01-24 10:59:43 -07:00
|
|
|
|
|
|
|
ts->inidle = 0;
|
2018-04-09 06:23:33 -06:00
|
|
|
idle_active = ts->idle_active;
|
|
|
|
tick_stopped = ts->tick_stopped;
|
2012-01-24 10:59:43 -07:00
|
|
|
|
2018-04-09 06:23:33 -06:00
|
|
|
if (idle_active || tick_stopped)
|
2009-09-29 06:25:15 -06:00
|
|
|
now = ktime_get();
|
|
|
|
|
2018-04-09 06:23:33 -06:00
|
|
|
if (idle_active)
|
2013-08-07 14:28:01 -06:00
|
|
|
tick_nohz_stop_idle(ts, now);
|
2008-01-30 05:30:04 -07:00
|
|
|
|
2018-04-09 06:23:33 -06:00
|
|
|
if (tick_stopped)
|
2018-03-15 16:05:50 -06:00
|
|
|
__tick_nohz_idle_restart_tick(ts, now);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
local_irq_enable();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The nohz low res interrupt handler
|
|
|
|
*/
|
|
|
|
static void tick_nohz_handler(struct clock_event_device *dev)
|
|
|
|
{
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2007-02-16 02:28:03 -07:00
|
|
|
struct pt_regs *regs = get_irq_regs();
|
|
|
|
ktime_t now = ktime_get();
|
|
|
|
|
2016-12-25 03:38:40 -07:00
|
|
|
dev->next_event = KTIME_MAX;
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2018-04-06 06:59:13 -06:00
|
|
|
tick_sched_do_timer(ts, now);
|
2012-10-14 18:43:03 -06:00
|
|
|
tick_sched_handle(ts, regs);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2014-06-12 04:54:41 -06:00
|
|
|
/* No need to reprogram if we are running tickless */
|
|
|
|
if (unlikely(ts->tick_stopped))
|
|
|
|
return;
|
|
|
|
|
2015-04-14 15:08:54 -06:00
|
|
|
hrtimer_forward(&ts->sched_timer, now, tick_period);
|
|
|
|
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
|
2015-05-26 16:50:33 -06:00
|
|
|
static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
|
|
|
|
{
|
|
|
|
if (!tick_nohz_enabled)
|
|
|
|
return;
|
|
|
|
ts->nohz_mode = mode;
|
|
|
|
/* One update is enough */
|
|
|
|
if (!test_and_set_bit(0, &tick_nohz_active))
|
2018-01-14 15:30:51 -07:00
|
|
|
timers_update_nohz();
|
2015-05-26 16:50:33 -06:00
|
|
|
}
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/**
|
|
|
|
* tick_nohz_switch_to_nohz - switch to nohz mode
|
|
|
|
*/
|
|
|
|
static void tick_nohz_switch_to_nohz(void)
|
|
|
|
{
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2007-02-16 02:28:03 -07:00
|
|
|
ktime_t next;
|
|
|
|
|
2014-04-14 23:24:41 -06:00
|
|
|
if (!tick_nohz_enabled)
|
2007-02-16 02:28:03 -07:00
|
|
|
return;
|
|
|
|
|
2015-05-07 06:35:59 -06:00
|
|
|
if (tick_switch_to_oneshot(tick_nohz_handler))
|
2007-02-16 02:28:03 -07:00
|
|
|
return;
|
2015-05-07 06:35:59 -06:00
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* Recycle the hrtimer in ts, so we can share the
|
|
|
|
* hrtimer_forward with the highres code.
|
|
|
|
*/
|
|
|
|
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
|
|
|
/* Get the next period */
|
|
|
|
next = tick_init_jiffy_update();
|
|
|
|
|
2015-04-14 15:08:54 -06:00
|
|
|
hrtimer_set_expires(&ts->sched_timer, next);
|
2016-01-27 04:26:07 -07:00
|
|
|
hrtimer_forward_now(&ts->sched_timer, tick_period);
|
|
|
|
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
|
2015-05-26 16:50:33 -06:00
|
|
|
tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
|
|
|
|
2013-12-04 10:28:20 -07:00
|
|
|
static inline void tick_nohz_irq_enter(void)
|
2009-09-29 06:25:15 -06:00
|
|
|
{
|
2014-08-17 11:30:27 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2009-09-29 06:25:15 -06:00
|
|
|
ktime_t now;
|
|
|
|
|
|
|
|
if (!ts->idle_active && !ts->tick_stopped)
|
|
|
|
return;
|
|
|
|
now = ktime_get();
|
|
|
|
if (ts->idle_active)
|
2013-08-07 14:28:01 -06:00
|
|
|
tick_nohz_stop_idle(ts, now);
|
2016-07-04 03:50:35 -06:00
|
|
|
if (ts->tick_stopped)
|
2009-09-29 06:25:15 -06:00
|
|
|
tick_nohz_update_jiffies(now);
|
|
|
|
}
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
#else
|
|
|
|
|
|
|
|
static inline void tick_nohz_switch_to_nohz(void) { }
|
2013-12-04 10:28:20 -07:00
|
|
|
static inline void tick_nohz_irq_enter(void) { }
|
2015-05-26 16:50:33 -06:00
|
|
|
static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2011-08-10 15:21:01 -06:00
|
|
|
#endif /* CONFIG_NO_HZ_COMMON */
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2008-10-17 01:59:47 -06:00
|
|
|
/*
|
|
|
|
* Called from irq_enter to notify about the possible interruption of idle()
|
|
|
|
*/
|
2013-12-04 10:28:20 -07:00
|
|
|
void tick_irq_enter(void)
|
2008-10-17 01:59:47 -06:00
|
|
|
{
|
2013-08-07 14:28:01 -06:00
|
|
|
tick_check_oneshot_broadcast_this_cpu();
|
2013-12-04 10:28:20 -07:00
|
|
|
tick_nohz_irq_enter();
|
2008-10-17 01:59:47 -06:00
|
|
|
}
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
|
|
|
* High resolution timer specific code
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_HIGH_RES_TIMERS
|
2020-07-20 13:30:36 -06:00
|
|
|
static void (*wake_callback)(void);
|
2016-08-23 17:07:11 -06:00
|
|
|
|
2020-08-01 13:15:07 -06:00
|
|
|
void register_tick_sched_wakeup_callback(void (*cb)(void))
|
|
|
|
{
|
|
|
|
if (!wake_callback)
|
|
|
|
wake_callback = cb;
|
|
|
|
else
|
|
|
|
pr_warn("tick-sched wake cb already exists; skipping.\n");
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(register_tick_sched_wakeup_callback);
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/*
|
2008-01-30 05:30:00 -07:00
|
|
|
* We rearm the timer until we get disabled by the idle code.
|
2012-10-24 11:07:35 -06:00
|
|
|
* Called with interrupts disabled.
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
|
|
|
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts =
|
|
|
|
container_of(timer, struct tick_sched, sched_timer);
|
|
|
|
struct pt_regs *regs = get_irq_regs();
|
|
|
|
ktime_t now = ktime_get();
|
2007-05-08 01:30:03 -06:00
|
|
|
|
2018-04-06 06:59:13 -06:00
|
|
|
tick_sched_do_timer(ts, now);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Do not call, when we are not in irq context and have
|
|
|
|
* no valid regs pointer
|
|
|
|
*/
|
2016-08-23 17:07:11 -06:00
|
|
|
if (regs) {
|
2012-10-14 18:43:03 -06:00
|
|
|
tick_sched_handle(ts, regs);
|
Merge android-4.19-stable.136 (204dd19) into msm-4.19
* refs/heads/tmp-204dd19:
UPSTREAM: driver core: Avoid deferred probe due to fw_devlink_pause/resume()
UPSTREAM: driver core: Rename dev_links_info.defer_sync to defer_hook
UPSTREAM: driver core: Don't do deferred probe in parallel with kernel_init thread
Restore sdcardfs feature
Revert rpmh and usb changes
Linux 4.19.136
regmap: debugfs: check count when read regmap file
rtnetlink: Fix memory(net_device) leak when ->newlink fails
udp: Improve load balancing for SO_REUSEPORT.
udp: Copy has_conns in reuseport_grow().
sctp: shrink stream outq when fails to do addstream reconf
sctp: shrink stream outq only when new outcnt < old outcnt
AX.25: Prevent integer overflows in connect and sendmsg
tcp: allow at most one TLP probe per flight
rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA
qrtr: orphan socket in qrtr_release()
net: udp: Fix wrong clean up for IS_UDPLITE macro
net-sysfs: add a newline when printing 'tx_timeout' by sysfs
ip6_gre: fix null-ptr-deref in ip6gre_init_net()
drivers/net/wan/x25_asy: Fix to make it work
dev: Defer free of skbs in flush_backlog
AX.25: Prevent out-of-bounds read in ax25_sendmsg()
AX.25: Fix out-of-bounds read in ax25_connect()
Linux 4.19.135
ath9k: Fix regression with Atheros 9271
ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb
dm integrity: fix integrity recalculation that is improperly skipped
ASoC: qcom: Drop HAS_DMA dependency to fix link failure
ASoC: rt5670: Add new gpio1_is_ext_spk_en quirk and enable it on the Lenovo Miix 2 10
x86, vmlinux.lds: Page-align end of ..page_aligned sections
parisc: Add atomic64_set_release() define to avoid CPU soft lockups
drm/amd/powerplay: fix a crash when overclocking Vega M
drm/amdgpu: Fix NULL dereference in dpm sysfs handlers
io-mapping: indicate mapping failure
mm: memcg/slab: fix memory leak at non-root kmem_cache destroy
mm: memcg/slab: synchronize access to kmem_cache dying flag using a spinlock
mm/memcg: fix refcount error while moving and swapping
Makefile: Fix GCC_TOOLCHAIN_DIR prefix for Clang cross compilation
vt: Reject zero-sized screen buffer size.
fbdev: Detect integer underflow at "struct fbcon_ops"->clear_margins.
serial: 8250_mtk: Fix high-speed baud rates clamping
serial: 8250: fix null-ptr-deref in serial8250_start_tx()
staging: comedi: addi_apci_1564: check INSN_CONFIG_DIGITAL_TRIG shift
staging: comedi: addi_apci_1500: check INSN_CONFIG_DIGITAL_TRIG shift
staging: comedi: ni_6527: fix INSN_CONFIG_DIGITAL_TRIG support
staging: comedi: addi_apci_1032: check INSN_CONFIG_DIGITAL_TRIG shift
staging: wlan-ng: properly check endpoint types
Revert "cifs: Fix the target file was deleted when rename failed."
usb: xhci: Fix ASM2142/ASM3142 DMA addressing
usb: xhci-mtk: fix the failure of bandwidth allocation
binder: Don't use mmput() from shrinker function.
RISC-V: Upgrade smp_mb__after_spinlock() to iorw,iorw
x86: math-emu: Fix up 'cmp' insn for clang ias
arm64: Use test_tsk_thread_flag() for checking TIF_SINGLESTEP
hwmon: (scmi) Fix potential buffer overflow in scmi_hwmon_probe()
hwmon: (adm1275) Make sure we are reading enough data for different chips
usb: gadget: udc: gr_udc: fix memleak on error handling path in gr_ep_init()
Input: synaptics - enable InterTouch for ThinkPad X1E 1st gen
dmaengine: ioat setting ioat timeout as module parameter
hwmon: (aspeed-pwm-tacho) Avoid possible buffer overflow
regmap: dev_get_regmap_match(): fix string comparison
spi: mediatek: use correct SPI_CFG2_REG MACRO
Input: add `SW_MACHINE_COVER`
dmaengine: tegra210-adma: Fix runtime PM imbalance on error
HID: apple: Disable Fn-key key-re-mapping on clone keyboards
HID: steam: fixes race in handling device list.
HID: alps: support devices with report id 2
HID: i2c-hid: add Mediacom FlexBook edge13 to descriptor override
scripts/gdb: fix lx-symbols 'gdb.error' while loading modules
scripts/decode_stacktrace: strip basepath from all paths
serial: exar: Fix GPIO configuration for Sealevel cards based on XR17V35X
bonding: check return value of register_netdevice() in bond_newlink()
i2c: rcar: always clear ICSAR to avoid side effects
net: ethernet: ave: Fix error returns in ave_init
ipvs: fix the connection sync failed in some cases
qed: suppress "don't support RoCE & iWARP" flooding on HW init
mlxsw: destroy workqueue when trap_register in mlxsw_emad_init
bonding: check error value of register_netdevice() immediately
net: smc91x: Fix possible memory leak in smc_drv_probe()
drm: sun4i: hdmi: Fix inverted HPD result
ieee802154: fix one possible memleak in adf7242_probe
net: dp83640: fix SIOCSHWTSTAMP to update the struct with actual configuration
ax88172a: fix ax88172a_unbind() failures
hippi: Fix a size used in a 'pci_free_consistent()' in an error handling path
fpga: dfl: fix bug in port reset handshake
bnxt_en: Fix race when modifying pause settings.
btrfs: fix page leaks after failure to lock page for delalloc
btrfs: fix mount failure caused by race with umount
btrfs: fix double free on ulist after backref resolution failure
ASoC: rt5670: Correct RT5670_LDO_SEL_MASK
ALSA: info: Drop WARN_ON() from buffer NULL sanity check
uprobes: Change handle_swbp() to send SIGTRAP with si_code=SI_KERNEL, to fix GDB regression
IB/umem: fix reference count leak in ib_umem_odp_get()
tipc: clean up skb list lock handling on send path
spi: spi-fsl-dspi: Exit the ISR with IRQ_NONE when it's not ours
SUNRPC reverting d03727b248d0 ("NFSv4 fix CLOSE not waiting for direct IO compeletion")
irqdomain/treewide: Keep firmware node unconditionally allocated
fuse: fix weird page warning
drivers/firmware/psci: Fix memory leakage in alloc_init_cpu_groups()
drm/nouveau/i2c/g94-: increase NV_PMGR_DP_AUXCTL_TRANSACTREQ timeout
net: sky2: initialize return of gm_phy_read
drivers/net/wan/lapbether: Fixed the value of hard_header_len
xtensa: update *pos in cpuinfo_op.next
xtensa: fix __sync_fetch_and_{and,or}_4 declarations
scsi: scsi_transport_spi: Fix function pointer check
mac80211: allow rx of mesh eapol frames with default rx key
pinctrl: amd: fix npins for uart0 in kerncz_groups
gpio: arizona: put pm_runtime in case of failure
gpio: arizona: handle pm_runtime_get_sync failure case
soc: qcom: rpmh: Dirt can only make you dirtier, not cleaner
ANDROID: build: update ABI definitions
ANDROID: update the kernel release format for GKI
ANDROID: Incremental fs: magic number compatible 32-bit
ANDROID: kbuild: don't merge .*..compoundliteral in modules
ANDROID: GKI: preserve ABI for struct sock_cgroup_data
Revert "genetlink: remove genl_bind"
Revert "arm64/alternatives: use subsections for replacement sequences"
Linux 4.19.134
spi: sprd: switch the sequence of setting WDG_LOAD_LOW and _HIGH
rxrpc: Fix trace string
libceph: don't omit recovery_deletes in target_copy()
printk: queue wake_up_klogd irq_work only if per-CPU areas are ready
genirq/affinity: Handle affinity setting on inactive interrupts correctly
sched/fair: handle case of task_h_load() returning 0
sched: Fix unreliable rseq cpu_id for new tasks
arm64: compat: Ensure upper 32 bits of x0 are zero on syscall return
arm64: ptrace: Consistently use pseudo-singlestep exceptions
arm64: ptrace: Override SPSR.SS when single-stepping is enabled
thermal/drivers/cpufreq_cooling: Fix wrong frequency converted from power
misc: atmel-ssc: lock with mutex instead of spinlock
dmaengine: fsl-edma: Fix NULL pointer exception in fsl_edma_tx_handler
intel_th: Fix a NULL dereference when hub driver is not loaded
intel_th: pci: Add Emmitsburg PCH support
intel_th: pci: Add Tiger Lake PCH-H support
intel_th: pci: Add Jasper Lake CPU support
powerpc/book3s64/pkeys: Fix pkey_access_permitted() for execute disable pkey
hwmon: (emc2103) fix unable to change fan pwm1_enable attribute
riscv: use 16KB kernel stack on 64-bit
MIPS: Fix build for LTS kernel caused by backporting lpj adjustment
timer: Fix wheel index calculation on last level
timer: Prevent base->clk from moving backward
uio_pdrv_genirq: fix use without device tree and no interrupt
Input: i8042 - add Lenovo XiaoXin Air 12 to i8042 nomux list
mei: bus: don't clean driver pointer
Revert "zram: convert remaining CLASS_ATTR() to CLASS_ATTR_RO()"
fuse: Fix parameter for FS_IOC_{GET,SET}FLAGS
ovl: fix unneeded call to ovl_change_flags()
ovl: relax WARN_ON() when decoding lower directory file handle
ovl: inode reference leak in ovl_is_inuse true case.
serial: mxs-auart: add missed iounmap() in probe failure and remove
virtio: virtio_console: add missing MODULE_DEVICE_TABLE() for rproc serial
virt: vbox: Fix guest capabilities mask check
virt: vbox: Fix VBGL_IOCTL_VMMDEV_REQUEST_BIG and _LOG req numbers to match upstream
USB: serial: option: add Quectel EG95 LTE modem
USB: serial: option: add GosunCn GM500 series
USB: serial: ch341: add new Product ID for CH340
USB: serial: cypress_m8: enable Simply Automated UPB PIM
USB: serial: iuu_phoenix: fix memory corruption
usb: gadget: function: fix missing spinlock in f_uac1_legacy
usb: chipidea: core: add wakeup support for extcon
usb: dwc2: Fix shutdown callback in platform
USB: c67x00: fix use after free in c67x00_giveback_urb
ALSA: hda/realtek - Enable Speaker for ASUS UX533 and UX534
ALSA: hda/realtek - change to suitable link model for ASUS platform
ALSA: usb-audio: Fix race against the error recovery URB submission
ALSA: line6: Sync the pending work cancel at disconnection
ALSA: line6: Perform sanity check for each URB creation
HID: quirks: Ignore Simply Automated UPB PIM
HID: quirks: Always poll Obins Anne Pro 2 keyboard
HID: magicmouse: do not set up autorepeat
slimbus: core: Fix mismatch in of_node_get/put
mtd: rawnand: oxnas: Release all devices in the _remove() path
mtd: rawnand: oxnas: Unregister all devices on error
mtd: rawnand: oxnas: Keep track of registered devices
mtd: rawnand: brcmnand: fix CS0 layout
mtd: rawnand: timings: Fix default tR_max and tCCS_min timings
mtd: rawnand: marvell: Fix probe error path
mtd: rawnand: marvell: Use nand_cleanup() when the device is not yet registered
soc: qcom: rpmh-rsc: Allow using free WAKE TCS for active request
soc: qcom: rpmh-rsc: Clear active mode configuration for wake TCS
soc: qcom: rpmh: Invalidate SLEEP and WAKE TCSes before flushing new data
soc: qcom: rpmh: Update dirty flag only when data changes
perf stat: Zero all the 'ena' and 'run' array slot stats for interval mode
apparmor: ensure that dfa state tables have entries
copy_xstate_to_kernel: Fix typo which caused GDB regression
regmap: debugfs: Don't sleep while atomic for fast_io regmaps
ARM: dts: socfpga: Align L2 cache-controller nodename with dtschema
Revert "thermal: mediatek: fix register index error"
staging: comedi: verify array index is correct before using it
usb: gadget: udc: atmel: fix uninitialized read in debug printk
spi: spi-sun6i: sun6i_spi_transfer_one(): fix setting of clock rate
arm64: dts: meson: add missing gxl rng clock
phy: sun4i-usb: fix dereference of pointer phy0 before it is null checked
iio:health:afe4404 Fix timestamp alignment and prevent data leak.
ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Flight S
ACPI: video: Use native backlight on Acer TravelMate 5735Z
Input: mms114 - add extra compatible for mms345l
ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Alpha S
ACPI: video: Use native backlight on Acer Aspire 5783z
ALSA: usb-audio: Rewrite registration quirk handling
mmc: sdhci: do not enable card detect interrupt for gpio cd type
doc: dt: bindings: usb: dwc3: Update entries for disabling SS instances in park mode
ALSA: usb-audio: Create a registration quirk for Kingston HyperX Amp (0951:16d8)
scsi: sr: remove references to BLK_DEV_SR_VENDOR, leave it enabled
ARM: at91: pm: add quirk for sam9x60's ulp1
HID: quirks: Remove ITE 8595 entry from hid_have_special_driver
net: sfp: add some quirks for GPON modules
net: sfp: add support for module quirks
Revert "usb/ehci-platform: Set PM runtime as active on resume"
Revert "usb/xhci-plat: Set PM runtime as active on resume"
Revert "usb/ohci-platform: Fix a warning when hibernating"
of: of_mdio: Correct loop scanning logic
net: dsa: bcm_sf2: Fix node reference count
spi: spi-fsl-dspi: Fix lockup if device is shutdown during SPI transfer
spi: fix initial SPI_SR value in spi-fsl-dspi
iio:health:afe4403 Fix timestamp alignment and prevent data leak.
iio:pressure:ms5611 Fix buffer element alignment
iio:humidity:hts221 Fix alignment and data leak issues
iio: pressure: zpa2326: handle pm_runtime_get_sync failure
iio: mma8452: Add missed iio_device_unregister() call in mma8452_probe()
iio: magnetometer: ak8974: Fix runtime PM imbalance on error
iio:humidity:hdc100x Fix alignment and data leak issues
iio:magnetometer:ak8974: Fix alignment and data leak issues
arm64/alternatives: don't patch up internal branches
i2c: eg20t: Load module automatically if ID matches
gfs2: read-only mounts should grab the sd_freeze_gl glock
tpm_tis: extra chip->ops check on error path in tpm_tis_core_init
arm64/alternatives: use subsections for replacement sequences
m68k: mm: fix node memblock init
m68k: nommu: register start of the memory with memblock
drm/exynos: fix ref count leak in mic_pre_enable
drm/msm: fix potential memleak in error branch
vlan: consolidate VLAN parsing code and limit max parsing depth
sched: consistently handle layer3 header accesses in the presence of VLANs
cgroup: Fix sock_cgroup_data on big-endian.
cgroup: fix cgroup_sk_alloc() for sk_clone_lock()
tcp: md5: allow changing MD5 keys in all socket states
tcp: md5: refine tcp_md5_do_add()/tcp_md5_hash_key() barriers
tcp: md5: do not send silly options in SYNCOOKIES
tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key()
tcp: make sure listeners don't initialize congestion-control state
tcp: fix SO_RCVLOWAT possible hangs under high mem pressure
net: usb: qmi_wwan: add support for Quectel EG95 LTE modem
net_sched: fix a memory leak in atm_tc_init()
net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb
llc: make sure applications use ARPHRD_ETHER
l2tp: remove skb_dst_set() from l2tp_xmit_skb()
ipv4: fill fl4_icmp_{type,code} in ping_v4_sendmsg
genetlink: remove genl_bind
net: rmnet: fix lower interface leak
perf: Make perf able to build with latest libbfd
UPSTREAM: media: v4l2-ctrl: Add H264 profile and levels
UPSTREAM: media: v4l2-ctrl: Add control for h.264 chroma qp offset
ANDROID: GKI: ASoC: compress: revert some code to avoid race condition
ANDROID: GKI: Update the ABI xml representation.
ANDROID: GKI: kernel: tick-sched: Add an API for wakeup callbacks
ANDROID: ASoC: Compress: Check and set pcm_new driver op
Revert "ANDROID: GKI: arm64: gki_defconfig: Disable CONFIG_ARM64_TAGGED_ADDR_ABI"
ANDROID: arm64: configs: enabe CONFIG_TMPFS
Revert "ALSA: compress: fix partial_drain completion state"
ANDROID: GKI: enable CONFIG_EXT4_FS_POSIX_ACL.
ANDROID: GKI: set CONFIG_STATIC_USERMODEHELPER_PATH
Linux 4.19.133
s390/mm: fix huge pte soft dirty copying
ARC: elf: use right ELF_ARCH
ARC: entry: fix potential EFA clobber when TIF_SYSCALL_TRACE
dm: use noio when sending kobject event
drm/radeon: fix double free
btrfs: fix fatal extent_buffer readahead vs releasepage race
Revert "ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb"
bpf: Check correct cred for CAP_SYSLOG in bpf_dump_raw_ok()
kprobes: Do not expose probe addresses to non-CAP_SYSLOG
module: Do not expose section addresses to non-CAP_SYSLOG
module: Refactor section attr into bin attribute
kernel: module: Use struct_size() helper
kallsyms: Refactor kallsyms_show_value() to take cred
KVM: x86: Mark CR4.TSD as being possibly owned by the guest
KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode
KVM: x86: bit 8 of non-leaf PDPEs is not reserved
KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART
KVM: arm64: Fix definition of PAGE_HYP_DEVICE
ALSA: usb-audio: add quirk for MacroSilicon MS2109
ALSA: hda - let hs_mic be picked ahead of hp_mic
ALSA: opl3: fix infoleak in opl3
mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON()
net: macb: mark device wake capable when "magic-packet" property present
bnxt_en: fix NULL dereference in case SR-IOV configuration fails
cxgb4: fix all-mask IP address comparison
nbd: Fix memory leak in nbd_add_socket
arm64: kgdb: Fix single-step exception handling oops
ALSA: compress: fix partial_drain completion state
net: hns3: fix use-after-free when doing self test
smsc95xx: avoid memory leak in smsc95xx_bind
smsc95xx: check return value of smsc95xx_reset
net: cxgb4: fix return error value in t4_prep_fw
drm/mediatek: Check plane visibility in atomic_update
net: qrtr: Fix an out of bounds read qrtr_endpoint_post()
x86/entry: Increase entry_stack size to a full page
nvme-rdma: assign completion vector correctly
block: release bip in a right way in error path
usb: dwc3: pci: Fix reference count leak in dwc3_pci_resume_work
scsi: mptscsih: Fix read sense data size
ARM: imx6: add missing put_device() call in imx6q_suspend_init()
cifs: update ctime and mtime during truncate
s390/kasan: fix early pgm check handler execution
drm: panel-orientation-quirks: Use generic orientation-data for Acer S1003
drm: panel-orientation-quirks: Add quirk for Asus T101HA panel
i40e: protect ring accesses with READ- and WRITE_ONCE
ixgbe: protect ring accesses with READ- and WRITE_ONCE
spi: spidev: fix a potential use-after-free in spidev_release()
spi: spidev: fix a race between spidev_release and spidev_remove
gpu: host1x: Detach driver on unregister
drm/tegra: hub: Do not enable orphaned window group
ARM: dts: omap4-droid4: Fix spi configuration and increase rate
regmap: fix alignment issue
spi: spi-fsl-dspi: Fix external abort on interrupt in resume or exit paths
spi: spi-fsl-dspi: use IRQF_SHARED mode to request IRQ
spi: spi-fsl-dspi: Fix lockup if device is removed during SPI transfer
spi: spi-fsl-dspi: Adding shutdown hook
KVM: s390: reduce number of IO pins to 1
ANDROID: GKI: update abi based on padding fields being added
ANDROID: GKI: USB: Gadget: add Android ABI padding to struct usb_gadget
ANDROID: GKI: sound/usb/card.h: add Android ABI padding to struct snd_usb_endpoint
ANDROID: fscrypt: fix DUN contiguity with inline encryption + IV_INO_LBLK_32 policies
ANDROID: f2fs: add back compress inode check
Linux 4.19.132
efi: Make it possible to disable efivar_ssdt entirely
dm zoned: assign max_io_len correctly
irqchip/gic: Atomically update affinity
MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen
cifs: Fix the target file was deleted when rename failed.
SMB3: Honor lease disabling for multiuser mounts
SMB3: Honor persistent/resilient handle flags for multiuser mounts
SMB3: Honor 'seal' flag for multiuser mounts
Revert "ALSA: usb-audio: Improve frames size computation"
nfsd: apply umask on fs without ACL support
i2c: mlxcpld: check correct size of maximum RECV_LEN packet
i2c: algo-pca: Add 0x78 as SCL stuck low status for PCA9665
nvme: fix a crash in nvme_mpath_add_disk
SMB3: Honor 'posix' flag for multiuser mounts
virtio-blk: free vblk-vqs in error path of virtblk_probe()
drm: sun4i: hdmi: Remove extra HPD polling
hwmon: (acpi_power_meter) Fix potential memory leak in acpi_power_meter_add()
hwmon: (max6697) Make sure the OVERT mask is set correctly
cxgb4: fix SGE queue dump destination buffer context
cxgb4: use correct type for all-mask IP address comparison
cxgb4: parse TC-U32 key values and masks natively
cxgb4: use unaligned conversion for fetching timestamp
drm/msm/dpu: fix error return code in dpu_encoder_init
crypto: af_alg - fix use-after-free in af_alg_accept() due to bh_lock_sock()
kgdb: Avoid suspicious RCU usage warning
nvme-multipath: fix deadlock between ana_work and scan_work
nvme-multipath: set bdi capabilities once
s390/debug: avoid kernel warning on too large number of pages
usb: usbtest: fix missing kfree(dev->buf) in usbtest_disconnect
mm/slub: fix stack overruns with SLUB_STATS
mm/slub.c: fix corrupted freechain in deactivate_slab()
usbnet: smsc95xx: Fix use-after-free after removal
EDAC/amd64: Read back the scrub rate PCI register on F15h
mm: fix swap cache node allocation mask
btrfs: fix a block group ref counter leak after failure to remove block group
ANDROID: Update ABI representation for libabigail update
ANDROID: Update the ABI representation
ANDROID: Update the ABI xml representation
ANDROID: GKI: fix ABI diffs caused by GPU heap and pool vmstat additions
ANDROID: sched: consider stune boost margin when computing energy
ANDROID: GKI: move abi files to android/
ANDROID: GKI: drop unneeded "_whitelist" off of symbol filenames
UPSTREAM: binder: fix null deref of proc->context
ANDROID: cpufreq: schedutil: maintain raw cache when next_f is not changed
UPSTREAM: net: bpf: Make bpf_ktime_get_ns() available to non GPL programs
UPSTREAM: usb: musb: mediatek: add reset FADDR to zero in reset interrupt handle
ANDROID: GKI: scripts: Makefile: update the lz4 command (#2)
ANDROID: Update the ABI xml representation
Revert "drm/dsi: Fix byte order of DCS set/get brightness"
Linux 4.19.131
Revert "tty: hvc: Fix data abort due to race in hvc_open"
xfs: add agf freeblocks verify in xfs_agf_verify
dm writecache: add cond_resched to loop in persistent_memory_claim()
dm writecache: correct uncommitted_block when discarding uncommitted entry
NFSv4 fix CLOSE not waiting for direct IO compeletion
pNFS/flexfiles: Fix list corruption if the mirror count changes
SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment()
sunrpc: fixed rollback in rpc_gssd_dummy_populate()
Staging: rtl8723bs: prevent buffer overflow in update_sta_support_rate()
drm/radeon: fix fb_div check in ni_init_smc_spll_table()
drm: rcar-du: Fix build error
ring-buffer: Zero out time extend if it is nested and not absolute
tracing: Fix event trigger to accept redundant spaces
arm64: perf: Report the PC value in REGS_ABI_32 mode
ocfs2: fix panic on nfs server over ocfs2
ocfs2: fix value of OCFS2_INVALID_SLOT
ocfs2: load global_inode_alloc
ocfs2: avoid inode removal while nfsd is accessing it
mm/slab: use memzero_explicit() in kzfree()
btrfs: fix failure of RWF_NOWAIT write into prealloc extent beyond eof
btrfs: fix data block group relocation failure due to concurrent scrub
x86/asm/64: Align start of __clear_user() loop to 16-bytes
KVM: nVMX: Plumb L2 GPA through to PML emulation
KVM: X86: Fix MSR range of APIC registers in X2APIC mode
erofs: fix partially uninitialized misuse in z_erofs_onlinepage_fixup
ACPI: sysfs: Fix pm_profile_attr type
ALSA: hda/realtek - Add quirk for MSI GE63 laptop
ALSA: hda: Add NVIDIA codec IDs 9a & 9d through a0 to patch table
RISC-V: Don't allow write+exec only page mapping request in mmap
blktrace: break out of blktrace setup on concurrent calls
kbuild: improve cc-option to clean up all temporary files
arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n
s390/vdso: fix vDSO clock_getres()
s390/ptrace: fix setting syscall number
net: alx: fix race condition in alx_remove
ibmvnic: Harden device login requests
hwrng: ks-sa - Fix runtime PM imbalance on error
riscv/atomic: Fix sign extension for RV64I
drm/amd/display: Use kfree() to free rgb_user in calculate_user_regamma_ramp()
ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function
sata_rcar: handle pm_runtime_get_sync failure cases
sched/core: Fix PI boosting between RT and DEADLINE tasks
sched/deadline: Initialize ->dl_boosted
i2c: core: check returned size of emulated smbus block read
i2c: fsi: Fix the port number field in status register
net: bcmgenet: use hardware padding of runt frames
netfilter: ipset: fix unaligned atomic access
usb: gadget: udc: Potential Oops in error handling code
ARM: imx5: add missing put_device() call in imx_suspend_alloc_ocram()
cxgb4: move handling L2T ARP failures to caller
net: qed: fix excessive QM ILT lines consumption
net: qed: fix NVMe login fails over VFs
net: qed: fix left elements count calculation
RDMA/mad: Fix possible memory leak in ib_mad_post_receive_mads()
ASoC: rockchip: Fix a reference count leak.
RDMA/cma: Protect bind_list and listen_list while finding matching cm id
RDMA/qedr: Fix KASAN: use-after-free in ucma_event_handler+0x532
rxrpc: Fix handling of rwind from an ACK packet
ARM: dts: NSP: Correct FA2 mailbox node
regmap: Fix memory leak from regmap_register_patch
x86/resctrl: Fix a NULL vs IS_ERR() static checker warning in rdt_cdp_peer_get()
ARM: dts: Fix duovero smsc interrupt for suspend
ASoC: fsl_ssi: Fix bclk calculation for mono channel
regualtor: pfuze100: correct sw1a/sw2 on pfuze3000
efi/esrt: Fix reference count leak in esre_create_sysfs_entry.
ASoC: q6asm: handle EOS correctly
xfrm: Fix double ESP trailer insertion in IPsec crypto offload.
cifs/smb3: Fix data inconsistent when zero file range
cifs/smb3: Fix data inconsistent when punch hole
IB/mad: Fix use after free when destroying MAD agent
loop: replace kill_bdev with invalidate_bdev
cdc-acm: Add DISABLE_ECHO quirk for Microchip/SMSC chip
xhci: Return if xHCI doesn't support LPM
xhci: Fix enumeration issue when setting max packet size for FS devices.
xhci: Fix incorrect EP_STATE_MASK
scsi: zfcp: Fix panic on ERP timeout for previously dismissed ERP action
ALSA: usb-audio: Fix OOB access of mixer element list
ALSA: usb-audio: add quirk for Samsung USBC Headset (AKG)
ALSA: usb-audio: add quirk for Denon DCD-1500RE
usb: typec: tcpci_rt1711h: avoid screaming irq causing boot hangs
usb: host: ehci-exynos: Fix error check in exynos_ehci_probe()
xhci: Poll for U0 after disabling USB2 LPM
usb: host: xhci-mtk: avoid runtime suspend when removing hcd
USB: ehci: reopen solution for Synopsys HC bug
usb: add USB_QUIRK_DELAY_INIT for Logitech C922
usb: dwc2: Postponed gadget registration to the udc class driver
USB: ohci-sm501: Add missed iounmap() in remove
net: core: reduce recursion limit value
net: Do not clear the sock TX queue in sk_set_socket()
net: Fix the arp error in some cases
sch_cake: don't call diffserv parsing code when it is not needed
tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT
sch_cake: fix a few style nits
sch_cake: don't try to reallocate or unshare skb unconditionally
ip_tunnel: fix use-after-free in ip_tunnel_lookup()
net: phy: Check harder for errors in get_phy_id()
ip6_gre: fix use-after-free in ip6gre_tunnel_lookup()
tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes
tcp: grow window for OOO packets only for SACK flows
tcp: don't ignore ECN CWR on pure ACK
sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket
rxrpc: Fix notification call on completion of discarded calls
rocker: fix incorrect error handling in dma_rings_init
net: usb: ax88179_178a: fix packet alignment padding
net: increment xmit_recursion level in dev_direct_xmit()
net: use correct this_cpu primitive in dev_recursion_level
net: place xmit recursion in softnet data
net: fix memleak in register_netdevice()
net: bridge: enfore alignment for ethernet address
mld: fix memory leak in ipv6_mc_destroy_dev()
ibmveth: Fix max MTU limit
apparmor: don't try to replace stale label in ptraceme check
ALSA: hda/realtek - Enable micmute LED on and HP system
ALSA: hda/realtek: Enable mute LED on an HP system
ALSA: hda/realtek - Enable the headset of ASUS B9450FA with ALC294
fix a braino in "sparc32: fix register window handling in genregs32_[gs]et()"
i2c: tegra: Fix Maximum transfer size
i2c: tegra: Add missing kerneldoc for some fields
i2c: tegra: Cleanup kerneldoc comments
EDAC/amd64: Add Family 17h Model 30h PCI IDs
net: sched: export __netdev_watchdog_up()
net: bcmgenet: remove HFB_CTRL access
mtd: rawnand: marvell: Fix the condition on a return code
fanotify: fix ignore mask logic for events on child and on dir
block/bio-integrity: don't free 'buf' if bio_integrity_add_page() failed
net: be more gentle about silly gso requests coming from user
ANDROID: lib/vdso: do not update timespec if clock_getres() fails
Revert "ANDROID: fscrypt: add key removal notifier chain"
ANDROID: update the ABI xml and qcom whitelist
ANDROID: fs: export vfs_{read|write}
ANDROID: GKI: update abi definitions now that sdcardfs is gone
Revert "ANDROID: sdcardfs: Enable modular sdcardfs"
Revert "ANDROID: vfs: Add setattr2 for filesystems with per mount permissions"
Revert "ANDROID: vfs: fix export symbol type"
Revert "ANDROID: vfs: Add permission2 for filesystems with per mount permissions"
Revert "ANDROID: vfs: fix export symbol types"
Revert "ANDROID: vfs: add d_canonical_path for stacked filesystem support"
Revert "ANDROID: fs: Restore vfs_path_lookup() export"
ANDROID: sdcardfs: remove sdcardfs from system
Revert "ALSA: usb-audio: Improve frames size computation"
ANDROID: Makefile: append BUILD_NUMBER to version string when defined
ANDROID: GKI: Update ABI for incremental fs
ANDROID: GKI: Update cuttlefish whitelist
ANDROID: GKI: Disable INCREMENTAL_FS on x86 too
ANDROID: cpufreq: schedutil: drop cache when update skipped due to rate limit
Linux 4.19.130
KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be generated
kvm: x86: Fix reserved bits related calculation errors caused by MKTME
kvm: x86: Move kvm_set_mmio_spte_mask() from x86.c to mmu.c
md: add feature flag MD_FEATURE_RAID0_LAYOUT
Revert "dpaa_eth: fix usage as DSA master, try 3"
net: core: device_rename: Use rwsem instead of a seqcount
sched/rt, net: Use CONFIG_PREEMPTION.patch
kretprobe: Prevent triggering kretprobe from within kprobe_flush_task
net: octeon: mgmt: Repair filling of RX ring
e1000e: Do not wake up the system via WOL if device wakeup is disabled
kprobes: Fix to protect kick_kprobe_optimizer() by kprobe_mutex
crypto: algboss - don't wait during notifier callback
crypto: algif_skcipher - Cap recv SG list at ctx->used
drm/i915/icl+: Fix hotplug interrupt disabling after storm detection
drm/i915: Whitelist context-local timestamp in the gen9 cmdparser
s390: fix syscall_get_error for compat processes
mtd: rawnand: tmio: Fix the probe error path
mtd: rawnand: mtk: Fix the probe error path
mtd: rawnand: plat_nand: Fix the probe error path
mtd: rawnand: socrates: Fix the probe error path
mtd: rawnand: oxnas: Fix the probe error path
mtd: rawnand: oxnas: Add of_node_put()
mtd: rawnand: orion: Fix the probe error path
mtd: rawnand: xway: Fix the probe error path
mtd: rawnand: sharpsl: Fix the probe error path
mtd: rawnand: diskonchip: Fix the probe error path
mtd: rawnand: Pass a nand_chip object to nand_release()
mtd: rawnand: Pass a nand_chip object to nand_scan()
block: nr_sects_write(): Disable preemption on seqcount write
x86/boot/compressed: Relax sed symbol type regex for LLVM ld.lld
drm/dp_mst: Increase ACT retry timeout to 3s
ext4: avoid race conditions when remounting with options that change dax
ext4: fix partial cluster initialization when splitting extent
selinux: fix double free
drm/amdgpu: Replace invalid device ID with a valid device ID
drm/qxl: Use correct notify port address when creating cursor ring
drm/dp_mst: Reformat drm_dp_check_act_status() a bit
drm: encoder_slave: fix refcouting error for modules
libata: Use per port sync for detach
arm64: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints
block: Fix use-after-free in blkdev_get()
afs: afs_write_end() should change i_size under the right lock
afs: Fix non-setting of mtime when writing into mmap
bcache: fix potential deadlock problem in btree_gc_coalesce
ext4: stop overwrite the errcode in ext4_setup_super
perf report: Fix NULL pointer dereference in hists__fprintf_nr_sample_events()
usb/ehci-platform: Set PM runtime as active on resume
usb: host: ehci-platform: add a quirk to avoid stuck
usb/xhci-plat: Set PM runtime as active on resume
xdp: Fix xsk_generic_xmit errno
net/filter: Permit reading NET in load_bytes_relative when MAC not set
x86/idt: Keep spurious entries unset in system_vectors
scsi: acornscsi: Fix an error handling path in acornscsi_probe()
drm/sun4i: hdmi ddc clk: Fix size of m divider
ASoC: rt5645: Add platform-data for Asus T101HA
ASoC: Intel: bytcr_rt5640: Add quirk for Toshiba Encore WT10-A tablet
ASoC: core: only convert non DPCM link to DPCM link
afs: Fix memory leak in afs_put_sysnames()
selftests/net: in timestamping, strncpy needs to preserve null byte
drivers/perf: hisi: Fix wrong value for all counters enable
NTB: ntb_test: Fix bug when counting remote files
NTB: perf: Fix race condition when run with ntb_test
NTB: perf: Fix support for hardware that doesn't have port numbers
NTB: perf: Don't require one more memory window than number of peers
NTB: Revert the change to use the NTB device dev for DMA allocations
NTB: ntb_tool: reading the link file should not end in a NULL byte
ntb_tool: pass correct struct device to dma_alloc_coherent
ntb_perf: pass correct struct device to dma_alloc_coherent
gfs2: fix use-after-free on transaction ail lists
blktrace: fix endianness for blk_log_remap()
blktrace: fix endianness in get_pdu_int()
blktrace: use errno instead of bi_status
selftests/vm/pkeys: fix alloc_random_pkey() to make it really random
elfnote: mark all .note sections SHF_ALLOC
include/linux/bitops.h: avoid clang shift-count-overflow warnings
lib/zlib: remove outdated and incorrect pre-increment optimization
geneve: change from tx_error to tx_dropped on missing metadata
crypto: omap-sham - add proper load balancing support for multicore
pinctrl: freescale: imx: Fix an error handling path in 'imx_pinctrl_probe()'
pinctrl: imxl: Fix an error handling path in 'imx1_pinctrl_core_probe()'
scsi: ufs: Don't update urgent bkops level when toggling auto bkops
scsi: iscsi: Fix reference count leak in iscsi_boot_create_kobj
gfs2: Allow lock_nolock mount to specify jid=X
openrisc: Fix issue with argument clobbering for clone/fork
rxrpc: Adjust /proc/net/rxrpc/calls to display call->debug_id not user_ID
vfio/mdev: Fix reference count leak in add_mdev_supported_type
ASoC: fsl_asrc_dma: Fix dma_chan leak when config DMA channel failed
extcon: adc-jack: Fix an error handling path in 'adc_jack_probe()'
powerpc/4xx: Don't unmap NULL mbase
of: Fix a refcounting bug in __of_attach_node_sysfs()
NFSv4.1 fix rpc_call_done assignment for BIND_CONN_TO_SESSION
net: sunrpc: Fix off-by-one issues in 'rpc_ntop6'
clk: sprd: return correct type of value for _sprd_pll_recalc_rate
KVM: PPC: Book3S HV: Ignore kmemleak false positives
scsi: ufs-qcom: Fix scheduling while atomic issue
clk: bcm2835: Fix return type of bcm2835_register_gate
scsi: target: tcmu: Fix a use after free in tcmu_check_expired_queue_cmd()
ASoC: fix incomplete error-handling in img_i2s_in_probe.
x86/apic: Make TSC deadline timer detection message visible
RDMA/iw_cxgb4: cleanup device debugfs entries on ULD remove
usb: gadget: Fix issue with config_ep_by_speed function
usb: gadget: fix potential double-free in m66592_probe.
usb: gadget: lpc32xx_udc: don't dereference ep pointer before null check
USB: gadget: udc: s3c2410_udc: Remove pointless NULL check in s3c2410_udc_nuke
usb: dwc2: gadget: move gadget resume after the core is in L0 state
watchdog: da9062: No need to ping manually before setting timeout
IB/cma: Fix ports memory leak in cma_configfs
PCI: dwc: Fix inner MSI IRQ domain registration
PCI/PTM: Inherit Switch Downstream Port PTM settings from Upstream Port
dm zoned: return NULL if dmz_get_zone_for_reclaim() fails to find a zone
powerpc/64s/pgtable: fix an undefined behaviour
arm64: tegra: Fix ethernet phy-mode for Jetson Xavier
scsi: target: tcmu: Userspace must not complete queued commands
clk: samsung: exynos5433: Add IGNORE_UNUSED flag to sclk_i2s1
fpga: dfl: afu: Corrected error handling levels
tty: n_gsm: Fix bogus i++ in gsm_data_kick
USB: host: ehci-mxc: Add error handling in ehci_mxc_drv_probe()
ASoC: Intel: bytcr_rt5640: Add quirk for Toshiba Encore WT8-A tablet
drm/msm/mdp5: Fix mdp5_init error path for failed mdp5_kms allocation
usb/ohci-platform: Fix a warning when hibernating
vfio-pci: Mask cap zero
powerpc/ps3: Fix kexec shutdown hang
powerpc/pseries/ras: Fix FWNMI_VALID off by one
ipmi: use vzalloc instead of kmalloc for user creation
HID: Add quirks for Trust Panora Graphic Tablet
tty: n_gsm: Fix waking up upper tty layer when room available
tty: n_gsm: Fix SOF skipping
powerpc/64: Don't initialise init_task->thread.regs
PCI: Fix pci_register_host_bridge() device_register() error handling
clk: ti: composite: fix memory leak
dlm: remove BUG() before panic()
pinctrl: rockchip: fix memleak in rockchip_dt_node_to_map
scsi: mpt3sas: Fix double free warnings
power: supply: smb347-charger: IRQSTAT_D is volatile
power: supply: lp8788: Fix an error handling path in 'lp8788_charger_probe()'
scsi: qla2xxx: Fix warning after FC target reset
PCI/ASPM: Allow ASPM on links to PCIe-to-PCI/PCI-X Bridges
PCI: rcar: Fix incorrect programming of OB windows
drivers: base: Fix NULL pointer exception in __platform_driver_probe() if a driver developer is foolish
serial: amba-pl011: Make sure we initialize the port.lock spinlock
i2c: pxa: fix i2c_pxa_scream_blue_murder() debug output
PCI: v3-semi: Fix a memory leak in v3_pci_probe() error handling paths
staging: sm750fb: add missing case while setting FB_VISUAL
usb: dwc3: gadget: Properly handle failed kick_transfer
thermal/drivers/ti-soc-thermal: Avoid dereferencing ERR_PTR
slimbus: ngd: get drvdata from correct device
tty: hvc: Fix data abort due to race in hvc_open
s390/qdio: put thinint indicator after early error
ALSA: usb-audio: Fix racy list management in output queue
ALSA: usb-audio: Improve frames size computation
staging: gasket: Fix mapping refcnt leak when register/store fails
staging: gasket: Fix mapping refcnt leak when put attribute fails
firmware: qcom_scm: fix bogous abuse of dma-direct internals
pinctrl: rza1: Fix wrong array assignment of rza1l_swio_entries
scsi: qedf: Fix crash when MFW calls for protocol stats while function is still probing
gpio: dwapb: Append MODULE_ALIAS for platform driver
ARM: dts: sun8i-h2-plus-bananapi-m2-zero: Fix led polarity
scsi: qedi: Do not flush offload work if ARP not resolved
arm64: dts: mt8173: fix unit name warnings
staging: greybus: fix a missing-check bug in gb_lights_light_config()
x86/purgatory: Disable various profiling and sanitizing options
apparmor: fix nnp subset test for unconfined
scsi: ibmvscsi: Don't send host info in adapter info MAD after LPM
scsi: sr: Fix sr_probe() missing deallocate of device minor
ASoC: meson: add missing free_irq() in error path
apparmor: check/put label on apparmor_sk_clone_security()
apparmor: fix introspection of of task mode for unconfined tasks
mksysmap: Fix the mismatch of '.L' symbols in System.map
NTB: Fix the default port and peer numbers for legacy drivers
NTB: ntb_pingpong: Choose doorbells based on port number
yam: fix possible memory leak in yam_init_driver
pwm: img: Call pm_runtime_put() in pm_runtime_get_sync() failed case
powerpc/crashkernel: Take "mem=" option into account
PCI: vmd: Filter resource type bits from shadow register
nfsd: Fix svc_xprt refcnt leak when setup callback client failed
powerpc/perf/hv-24x7: Fix inconsistent output values incase multiple hv-24x7 events run
clk: clk-flexgen: fix clock-critical handling
scsi: lpfc: Fix lpfc_nodelist leak when processing unsolicited event
mfd: wm8994: Fix driver operation if loaded as modules
gpio: dwapb: Call acpi_gpiochip_free_interrupts() on GPIO chip de-registration
m68k/PCI: Fix a memory leak in an error handling path
RDMA/mlx5: Add init2init as a modify command
vfio/pci: fix memory leaks in alloc_perm_bits()
ps3disk: use the default segment boundary
PCI: aardvark: Don't blindly enable ASPM L0s and don't write to read-only register
dm mpath: switch paths in dm_blk_ioctl() code path
serial: 8250: Fix max baud limit in generic 8250 port
usblp: poison URBs upon disconnect
clk: samsung: Mark top ISP and CAM clocks on Exynos542x as critical
i2c: pxa: clear all master action bits in i2c_pxa_stop_message()
f2fs: report delalloc reserve as non-free in statfs for project quota
iio: bmp280: fix compensation of humidity
scsi: qla2xxx: Fix issue with adapter's stopping state
PCI: Allow pci_resize_resource() for devices on root bus
ALSA: isa/wavefront: prevent out of bounds write in ioctl
ALSA: hda/realtek - Introduce polarity for micmute LED GPIO
scsi: qedi: Check for buffer overflow in qedi_set_path()
ARM: integrator: Add some Kconfig selections
ASoC: davinci-mcasp: Fix dma_chan refcnt leak when getting dma type
backlight: lp855x: Ensure regulators are disabled on probe failure
clk: qcom: msm8916: Fix the address location of pll->config_reg
remoteproc: Fix IDR initialisation in rproc_alloc()
iio: pressure: bmp280: Tolerate IRQ before registering
i2c: piix4: Detect secondary SMBus controller on AMD AM4 chipsets
ASoC: tegra: tegra_wm8903: Support nvidia, headset property
clk: sunxi: Fix incorrect usage of round_down()
power: supply: bq24257_charger: Replace depends on REGMAP_I2C with select
ANDROID: ext4: Optimize match for casefolded encrypted dirs
ANDROID: ext4: Handle casefolding with encryption
ANDROID: extcon: Remove redundant EXPORT_SYMBOL_GPL
ANDROID: update the ABI xml representation
ANDROID: GKI: cfg80211: add ABI changes for CONFIG_NL80211_TESTMODE
ANDROID: gki_defconfig: x86: Enable KERNEL_LZ4
ANDROID: GKI: scripts: Makefile: update the lz4 command
FROMLIST: f2fs: fix use-after-free when accessing bio->bi_crypt_context
UPSTREAM: fdt: Update CRC check for rng-seed
ANDROID: GKI: Update ABI for incremental fs
ANDROID: GKI: Update whitelist and defconfig for incfs
ANDROID: Use depmod from the hermetic toolchain
Linux 4.19.129
perf symbols: Fix debuginfo search for Ubuntu
perf probe: Check address correctness by map instead of _etext
perf probe: Fix to check blacklist address correctly
perf probe: Do not show the skipped events
w1: omap-hdq: cleanup to add missing newline for some dev_dbg
mtd: rawnand: pasemi: Fix the probe error path
mtd: rawnand: brcmnand: fix hamming oob layout
sunrpc: clean up properly in gss_mech_unregister()
sunrpc: svcauth_gss_register_pseudoflavor must reject duplicate registrations.
kbuild: force to build vmlinux if CONFIG_MODVERSION=y
powerpc/64s: Save FSCR to init_task.thread.fscr after feature init
powerpc/64s: Don't let DT CPU features set FSCR_DSCR
drivers/macintosh: Fix memleak in windfarm_pm112 driver
ARM: dts: s5pv210: Set keep-power-in-suspend for SDHCI1 on Aries
ARM: dts: at91: sama5d2_ptc_ek: fix vbus pin
ARM: dts: exynos: Fix GPIO polarity for thr GalaxyS3 CM36651 sensor's bus
ARM: tegra: Correct PL310 Auxiliary Control Register initialization
kernel/cpu_pm: Fix uninitted local in cpu_pm
alpha: fix memory barriers so that they conform to the specification
dm crypt: avoid truncating the logical block size
sparc64: fix misuses of access_process_vm() in genregs32_[sg]et()
sparc32: fix register window handling in genregs32_[gs]et()
gnss: sirf: fix error return code in sirf_probe()
pinctrl: samsung: Save/restore eint_mask over suspend for EINT_TYPE GPIOs
pinctrl: samsung: Correct setting of eint wakeup mask on s5pv210
power: vexpress: add suppress_bind_attrs to true
igb: Report speed and duplex as unknown when device is runtime suspended
media: ov5640: fix use of destroyed mutex
b43_legacy: Fix connection problem with WPA3
b43: Fix connection problem with WPA3
b43legacy: Fix case where channel status is corrupted
Bluetooth: hci_bcm: fix freeing not-requested IRQ
media: go7007: fix a miss of snd_card_free
carl9170: remove P2P_GO support
e1000e: Relax condition to trigger reset for ME workaround
e1000e: Disable TSO for buffer overrun workaround
PCI: Program MPS for RCiEP devices
ima: Call ima_calc_boot_aggregate() in ima_eventdigest_init()
btrfs: fix wrong file range cleanup after an error filling dealloc range
btrfs: fix error handling when submitting direct I/O bio
PCI: Generalize multi-function power dependency device links
PCI: Unify ACS quirk desired vs provided checking
PCI: Make ACS quirk implementations more uniform
serial: 8250_pci: Move Pericom IDs to pci_ids.h
PCI: Add Loongson vendor ID
x86/amd_nb: Add Family 19h PCI IDs
PCI: vmd: Add device id for VMD device 8086:9A0B
PCI: Add Amazon's Annapurna Labs vendor ID
PCI: Add Genesys Logic, Inc. Vendor ID
ALSA: lx6464es - add support for LX6464ESe pci express variant
x86/amd_nb: Add PCI device IDs for family 17h, model 70h
PCI: mediatek: Add controller support for MT7629
PCI: Enable NVIDIA HDA controllers
PCI: Add NVIDIA GPU multi-function power dependencies
PCI: Add Synopsys endpoint EDDA Device ID
misc: pci_endpoint_test: Add support to test PCI EP in AM654x
misc: pci_endpoint_test: Add the layerscape EP device support
PCI: Move Rohm Vendor ID to generic list
PCI: Move Synopsys HAPS platform device IDs
PCI: add USR vendor id and use it in r8169 and w6692 driver
x86/amd_nb: Add PCI device IDs for family 17h, model 30h
hwmon/k10temp, x86/amd_nb: Consolidate shared device IDs
pci:ipmi: Move IPMI PCI class id defines to pci_ids.h
PCI: Remove unused NFP32xx IDs
PCI: Add ACS quirk for Intel Root Complex Integrated Endpoints
PCI: Add ACS quirk for iProc PAXB
PCI: Avoid FLR for AMD Starship USB 3.0
PCI: Avoid FLR for AMD Matisse HD Audio & USB 3.0
PCI: Avoid Pericom USB controller OHCI/EHCI PME# defect
ext4: fix race between ext4_sync_parent() and rename()
ext4: fix error pointer dereference
ext4: fix EXT_MAX_EXTENT/INDEX to check for zeroed eh_max
evm: Fix possible memory leak in evm_calc_hmac_or_hash()
ima: Directly assign the ima_default_policy pointer to ima_rules
ima: Fix ima digest hash table key calculation
mm: initialize deferred pages with interrupts enabled
mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()
btrfs: send: emit file capabilities after chown
btrfs: include non-missing as a qualifier for the latest_bdev
string.h: fix incompatibility between FORTIFY_SOURCE and KASAN
platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type
platform/x86: intel-hid: Add a quirk to support HP Spectre X2 (2015)
platform/x86: hp-wmi: Convert simple_strtoul() to kstrtou32()
cpuidle: Fix three reference count leaks
spi: dw: Return any value retrieved from the dma_transfer callback
mmc: sdhci-esdhc-imx: fix the mask for tuning start point
ixgbe: fix signed-integer-overflow warning
mmc: via-sdmmc: Respect the cmd->busy_timeout from the mmc core
staging: greybus: sdio: Respect the cmd->busy_timeout from the mmc core
mmc: sdhci-msm: Set SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 quirk
bcache: fix refcount underflow in bcache_device_free()
MIPS: Fix IRQ tracing when call handle_fpe() and handle_msa_fpe()
PCI: Don't disable decoding when mmio_always_on is set
macvlan: Skip loopback packets in RX handler
btrfs: qgroup: mark qgroup inconsistent if we're inherting snapshot to a new qgroup
m68k: mac: Don't call via_flush_cache() on Mac IIfx
x86/mm: Stop printing BRK addresses
crypto: stm32/crc32 - fix multi-instance
crypto: stm32/crc32 - fix run-time self test issue.
crypto: stm32/crc32 - fix ext4 chksum BUG_ON()
mips: Add udelay lpj numbers adjustment
mips: MAAR: Use more precise address mask
x86/boot: Correct relocation destination on old linkers
mwifiex: Fix memory corruption in dump_station
rtlwifi: Fix a double free in _rtl_usb_tx_urb_setup()
net/mlx5e: IPoIB, Drop multicast packets that this interface sent
veth: Adjust hard_start offset on redirect XDP frames
md: don't flush workqueue unconditionally in md_open
mt76: avoid rx reorder buffer overflow
net: qed*: Reduce RX and TX default ring count when running inside kdump kernel
wcn36xx: Fix error handling path in 'wcn36xx_probe()'
ath10k: Remove msdu from idr when management pkt send fails
nvme: refine the Qemu Identify CNS quirk
platform/x86: intel-vbtn: Also handle tablet-mode switch on "Detachable" and "Portable" chassis-types
platform/x86: intel-vbtn: Do not advertise switches to userspace if they are not there
platform/x86: intel-vbtn: Split keymap into buttons and switches parts
platform/x86: intel-vbtn: Use acpi_evaluate_integer()
xfs: fix duplicate verification from xfs_qm_dqflush()
xfs: reset buffer write failure state on successful completion
kgdb: Fix spurious true from in_dbg_master()
mips: cm: Fix an invalid error code of INTVN_*_ERR
MIPS: Truncate link address into 32bit for 32bit kernel
Crypto/chcr: fix for ccm(aes) failed test
xfs: clean up the error handling in xfs_swap_extents
powerpc/spufs: fix copy_to_user while atomic
net: allwinner: Fix use correct return type for ndo_start_xmit()
media: cec: silence shift wrapping warning in __cec_s_log_addrs()
net: lpc-enet: fix error return code in lpc_mii_init()
drivers/perf: hisi: Fix typo in events attribute array
sched/core: Fix illegal RCU from offline CPUs
exit: Move preemption fixup up, move blocking operations down
lib/mpi: Fix 64-bit MIPS build with Clang
net: bcmgenet: set Rx mode before starting netif
selftests/bpf: Fix memory leak in extract_build_id()
netfilter: nft_nat: return EOPNOTSUPP if type or flags are not supported
audit: fix a net reference leak in audit_list_rules_send()
Bluetooth: btbcm: Add 2 missing models to subver tables
MIPS: Make sparse_init() using top-down allocation
media: platform: fcp: Set appropriate DMA parameters
media: dvb: return -EREMOTEIO on i2c transfer failure.
audit: fix a net reference leak in audit_send_reply()
dt-bindings: display: mediatek: control dpi pins mode to avoid leakage
e1000: Distribute switch variables for initialization
tools api fs: Make xxx__mountpoint() more scalable
brcmfmac: fix wrong location to get firmware feature
staging: android: ion: use vmap instead of vm_map_ram
net: vmxnet3: fix possible buffer overflow caused by bad DMA value in vmxnet3_get_rss()
x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit
spi: dw: Fix Rx-only DMA transfers
mmc: meson-mx-sdio: trigger a soft reset after a timeout or CRC error
batman-adv: Revert "disable ethtool link speed detection when auto negotiation off"
ARM: 8978/1: mm: make act_mm() respect THREAD_SIZE
btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums
clocksource: dw_apb_timer_of: Fix missing clockevent timers
clocksource: dw_apb_timer: Make CPU-affiliation being optional
spi: dw: Enable interrupts in accordance with DMA xfer mode
kgdb: Prevent infinite recursive entries to the debugger
kgdb: Disable WARN_CONSOLE_UNLOCKED for all kgdb
Bluetooth: Add SCO fallback for invalid LMP parameters error
MIPS: Loongson: Build ATI Radeon GPU driver as module
ixgbe: Fix XDP redirect on archs with PAGE_SIZE above 4K
arm64: insn: Fix two bugs in encoding 32-bit logical immediates
spi: dw: Zero DMA Tx and Rx configurations on stack
arm64: cacheflush: Fix KGDB trap detection
efi/libstub/x86: Work around LLVM ELF quirk build regression
net: ena: fix error returning in ena_com_get_hash_function()
net: atlantic: make hw_get_regs optional
spi: pxa2xx: Apply CS clk quirk to BXT
objtool: Ignore empty alternatives
media: si2157: Better check for running tuner in init
crypto: ccp -- don't "select" CONFIG_DMADEVICES
drm: bridge: adv7511: Extend list of audio sample rates
ACPI: GED: use correct trigger type field in _Exx / _Lxx handling
KVM: arm64: Synchronize sysreg state on injecting an AArch32 exception
xen/pvcalls-back: test for errors when calling backend_connect()
mmc: sdio: Fix potential NULL pointer error in mmc_sdio_init_card()
ARM: dts: at91: sama5d2_ptc_ek: fix sdmmc0 node description
mmc: sdhci-msm: Clear tuning done flag while hs400 tuning
agp/intel: Reinforce the barrier after GTT updates
perf: Add cond_resched() to task_function_call()
fat: don't allow to mount if the FAT length == 0
mm/slub: fix a memory leak in sysfs_slab_add()
drm/vkms: Hold gem object while still in-use
Smack: slab-out-of-bounds in vsscanf
ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb
ath9x: Fix stack-out-of-bounds Write in ath9k_hif_usb_rx_cb
ath9k: Fix use-after-free Write in ath9k_htc_rx_msg
ath9k: Fix use-after-free Read in ath9k_wmi_ctrl_rx
scsi: megaraid_sas: TM command refire leads to controller firmware crash
KVM: arm64: Make vcpu_cp1x() work on Big Endian hosts
KVM: MIPS: Fix VPN2_MASK definition for variable cpu_vmbits
KVM: MIPS: Define KVM_ENTRYHI_ASID to cpu_asid_mask(&boot_cpu_data)
KVM: nVMX: Consult only the "basic" exit reason when routing nested exit
KVM: nSVM: leave ASID aside in copy_vmcb_control_area
KVM: nSVM: fix condition for filtering async PF
video: fbdev: w100fb: Fix a potential double free.
proc: Use new_inode not new_inode_pseudo
ovl: initialize error in ovl_copy_xattr
selftests/net: in rxtimestamp getopt_long needs terminating null entry
crypto: virtio: Fix dest length calculation in __virtio_crypto_skcipher_do_req()
crypto: virtio: Fix src/dst scatterlist calculation in __virtio_crypto_skcipher_do_req()
crypto: virtio: Fix use-after-free in virtio_crypto_skcipher_finalize_req()
spi: pxa2xx: Fix runtime PM ref imbalance on probe error
spi: pxa2xx: Balance runtime PM enable/disable on error
spi: bcm2835: Fix controller unregister order
spi: pxa2xx: Fix controller unregister order
spi: Fix controller unregister order
spi: No need to assign dummy value in spi_unregister_controller()
x86/speculation: PR_SPEC_FORCE_DISABLE enforcement for indirect branches.
x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.
x86/speculation: Add support for STIBP always-on preferred mode
x86/speculation: Change misspelled STIPB to STIBP
KVM: x86: only do L1TF workaround on affected processors
KVM: x86/mmu: Consolidate "is MMIO SPTE" code
kvm: x86: Fix L1TF mitigation for shadow MMU
KVM: x86: Fix APIC page invalidation race
x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned
ALSA: pcm: disallow linking stream to itself
crypto: cavium/nitrox - Fix 'nitrox_get_first_device()' when ndevlist is fully iterated
PM: runtime: clk: Fix clk_pm_runtime_get() error path
spi: bcm-qspi: when tx/rx buffer is NULL set to 0
spi: bcm2835aux: Fix controller unregister order
spi: dw: Fix controller unregister order
nilfs2: fix null pointer dereference at nilfs_segctor_do_construct()
cgroup, blkcg: Prepare some symbols for module and !CONFIG_CGROUP usages
ACPI: PM: Avoid using power resources if there are none for D0
ACPI: GED: add support for _Exx / _Lxx handler methods
ACPI: CPPC: Fix reference count leak in acpi_cppc_processor_probe()
ACPI: sysfs: Fix reference count leak in acpi_sysfs_add_hotplug_profile()
ALSA: usb-audio: Add vendor, product and profile name for HP Thunderbolt Dock
ALSA: usb-audio: Fix inconsistent card PM state after resume
ALSA: hda/realtek - add a pintbl quirk for several Lenovo machines
ALSA: es1688: Add the missed snd_card_free()
efi/efivars: Add missing kobject_put() in sysfs entry creation error path
x86/reboot/quirks: Add MacBook6,1 reboot quirk
x86/speculation: Prevent rogue cross-process SSBD shutdown
x86/PCI: Mark Intel C620 MROMs as having non-compliant BARs
x86_64: Fix jiffies ODR violation
btrfs: tree-checker: Check level for leaves and nodes
aio: fix async fsync creds
mm: add kvfree_sensitive() for freeing sensitive data objects
perf probe: Accept the instance number of kretprobe event
x86/cpu/amd: Make erratum #1054 a legacy erratum
RDMA/uverbs: Make the event_queue fds return POLLERR when disassociated
ath9k_htc: Silence undersized packet warnings
powerpc/xive: Clear the page tables for the ESB IO mapping
drivers/net/ibmvnic: Update VNIC protocol version reporting
Input: synaptics - add a second working PNP_ID for Lenovo T470s
sched/fair: Don't NUMA balance for kthreads
ARM: 8977/1: ptrace: Fix mask for thumb breakpoint hook
Input: mms114 - fix handling of mms345l
crypto: talitos - fix ECB and CBC algs ivsize
btrfs: Detect unbalanced tree with empty leaf before crashing btree operations
btrfs: merge btrfs_find_device and find_device
lib: Reduce user_access_begin() boundaries in strncpy_from_user() and strnlen_user()
x86: uaccess: Inhibit speculation past access_ok() in user_access_begin()
arch/openrisc: Fix issues with access_ok()
Fix 'acccess_ok()' on alpha and SH
make 'user_access_begin()' do 'access_ok()'
selftests: bpf: fix use of undeclared RET_IF macro
tun: correct header offsets in napi frags mode
vxlan: Avoid infinite loop when suppressing NS messages with invalid options
bridge: Avoid infinite loop when suppressing NS messages with invalid options
net_failover: fixed rollback in net_failover_open()
ipv6: fix IPV6_ADDRFORM operation logic
writeback: Drop I_DIRTY_TIME_EXPIRE
writeback: Fix sync livelock due to b_dirty_time processing
writeback: Avoid skipping inode writeback
writeback: Protect inode->i_io_list with inode->i_lock
Revert "writeback: Avoid skipping inode writeback"
ANDROID: gki_defconfig: increase vbus_draw to 500mA
fscrypt: remove stale definition
fs-verity: remove unnecessary extern keywords
fs-verity: fix all kerneldoc warnings
fscrypt: add support for IV_INO_LBLK_32 policies
fscrypt: make test_dummy_encryption use v2 by default
fscrypt: support test_dummy_encryption=v2
fscrypt: add fscrypt_add_test_dummy_key()
linux/parser.h: add include guards
fscrypt: remove unnecessary extern keywords
fscrypt: name all function parameters
fscrypt: fix all kerneldoc warnings
ANDROID: Update the ABI
ANDROID: GKI: power: power-supply: Add POWER_SUPPLY_PROP_CHARGER_STATUS property
ANDROID: GKI: add dev to usb_gsi_request
ANDROID: GKI: dma-buf: add dent_count to dma_buf
ANDROID: Update the ABI xml and whitelist
ANDROID: GKI: update whitelist
ANDROID: extcon: Export symbol of `extcon_get_edev_name`
ANDROID: kbuild: merge more sections with LTO
UPSTREAM: timekeeping/vsyscall: Update VDSO data unconditionally
ANDROID: GKI: Revert "genetlink: disallow subscribing to unknown mcast groups"
BACKPORT: usb: musb: Add support for MediaTek musb controller
UPSTREAM: usb: musb: Add musb_clearb/w() interface
UPSTREAM: usb: musb: Add noirq type of dma create interface
UPSTREAM: usb: musb: Add get/set toggle hooks
UPSTREAM: dt-bindings: usb: musb: Add support for MediaTek musb controller
FROMGIT: driver core: Remove unnecessary is_fwnode_dev variable in device_add()
FROMGIT: driver core: Remove check in driver_deferred_probe_force_trigger()
FROMGIT: of: platform: Batch fwnode parsing when adding all top level devices
FROMGIT: BACKPORT: driver core: fw_devlink: Add support for batching fwnode parsing
BACKPORT: driver core: Look for waiting consumers only for a fwnode's primary device
BACKPORT: driver core: Add device links from fwnode only for the primary device
Linux 4.19.128
Revert "net/mlx5: Annotate mutex destroy for root ns"
uprobes: ensure that uprobe->offset and ->ref_ctr_offset are properly aligned
x86/speculation: Add Ivy Bridge to affected list
x86/speculation: Add SRBDS vulnerability and mitigation documentation
x86/speculation: Add Special Register Buffer Data Sampling (SRBDS) mitigation
x86/cpu: Add 'table' argument to cpu_matches()
x86/cpu: Add a steppings field to struct x86_cpu_id
nvmem: qfprom: remove incorrect write support
CDC-ACM: heed quirk also in error handling
staging: rtl8712: Fix IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK
tty: hvc_console, fix crashes on parallel open/close
vt: keyboard: avoid signed integer overflow in k_ascii
usb: musb: Fix runtime PM imbalance on error
usb: musb: start session in resume for host port
iio: vcnl4000: Fix i2c swapped word reading.
USB: serial: option: add Telit LE910C1-EUX compositions
USB: serial: usb_wwan: do not resubmit rx urb on fatal errors
USB: serial: qcserial: add DW5816e QDL support
net: check untrusted gso_size at kernel entry
vsock: fix timeout in vsock_accept()
NFC: st21nfca: add missed kfree_skb() in an error path
net: usb: qmi_wwan: add Telit LE910C1-EUX composition
l2tp: do not use inet_hash()/inet_unhash()
l2tp: add sk_family checks to l2tp_validate_socket
devinet: fix memleak in inetdev_init()
Revert "ANDROID: Remove default y on BRIDGE_IGMP_SNOOPING"
ANDROID: Update the ABI xml and whitelist
ANDROID: GKI: update whitelist
ANDROID: arch: arm64: vdso: export the symbols for time()
ANDROID: Incremental fs: Remove dependency on PKCS7_MESSAGE_PARSER
ANDROID: dm-bow: Add block_size option
f2fs: attach IO flags to the missing cases
f2fs: add node_io_flag for bio flags likewise data_io_flag
f2fs: remove unused parameter of f2fs_put_rpages_mapping()
f2fs: handle readonly filesystem in f2fs_ioc_shutdown()
f2fs: avoid utf8_strncasecmp() with unstable name
f2fs: don't return vmalloc() memory from f2fs_kmalloc()
ANDROID: GKI: set CONFIG_BLK_DEV_LOOP_MIN_COUNT to 16
ANDROID: Incremental fs: Cache successful hash calculations
ANDROID: Incremental fs: Fix four error-path bugs
Linux 4.19.127
net: smsc911x: Fix runtime PM imbalance on error
net: ethernet: stmmac: Enable interface clocks on probe for IPQ806x
net/ethernet/freescale: rework quiesce/activate for ucc_geth
null_blk: return error for invalid zone size
s390/mm: fix set_huge_pte_at() for empty ptes
drm/edid: Add Oculus Rift S to non-desktop list
net: bmac: Fix read of MAC address from ROM
x86/mmiotrace: Use cpumask_available() for cpumask_var_t variables
i2c: altera: Fix race between xfer_msg and isr thread
evm: Fix RCU list related warnings
ARC: [plat-eznps]: Restrict to CONFIG_ISA_ARCOMPACT
ARC: Fix ICCM & DCCM runtime size checks
s390/ftrace: save traced function caller
spi: dw: use "smp_mb()" to avoid sending spi data error
powerpc/powernv: Avoid re-registration of imc debugfs directory
scsi: hisi_sas: Check sas_port before using it
drm/i915: fix port checks for MST support on gen >= 11
airo: Fix read overflows sending packets
net: dsa: mt7530: set CPU port to fallback mode
scsi: ufs: Release clock if DMA map fails
mmc: fix compilation of user API
kernel/relay.c: handle alloc_percpu returning NULL in relay_open
p54usb: add AirVasT USB stick device-id
HID: i2c-hid: add Schneider SCL142ALM to descriptor override
HID: sony: Fix for broken buttons on DS3 USB dongles
mm: Fix mremap not considering huge pmd devmap
libnvdimm: Fix endian conversion issues
Revert "cgroup: Add memory barriers to plug cgroup_rstat_updated() race window"
f2fs: fix retry logic in f2fs_write_cache_pages()
ANDROID: Update ABI representation
Linux 4.19.126
mm/vmalloc.c: don't dereference possible NULL pointer in __vunmap()
netfilter: nf_conntrack_pptp: fix compilation warning with W=1 build
bonding: Fix reference count leak in bond_sysfs_slave_add.
crypto: chelsio/chtls: properly set tp->lsndtime
qlcnic: fix missing release in qlcnic_83xx_interrupt_test.
xsk: Add overflow check for u64 division, stored into u32
bnxt_en: Fix accumulation of bp->net_stats_prev.
esp6: get the right proto for transport mode in esp6_gso_encap
netfilter: nf_conntrack_pptp: prevent buffer overflows in debug code
netfilter: nfnetlink_cthelper: unbreak userspace helper support
netfilter: ipset: Fix subcounter update skip
netfilter: nft_reject_bridge: enable reject with bridge vlan
ip_vti: receive ipip packet by calling ip_tunnel_rcv
vti4: eliminated some duplicate code.
xfrm: fix error in comment
xfrm: fix a NULL-ptr deref in xfrm_local_error
xfrm: fix a warning in xfrm_policy_insert_list
xfrm interface: fix oops when deleting a x-netns interface
xfrm: call xfrm_output_gso when inner_protocol is set in xfrm_output
xfrm: allow to accept packets with ipv6 NEXTHDR_HOP in xfrm_input
copy_xstate_to_kernel(): don't leave parts of destination uninitialized
x86/dma: Fix max PFN arithmetic overflow on 32 bit systems
mac80211: mesh: fix discovery timer re-arming issue / crash
RDMA/core: Fix double destruction of uobject
mmc: core: Fix recursive locking issue in CQE recovery path
parisc: Fix kernel panic in mem_init()
iommu: Fix reference count leak in iommu_group_alloc.
include/asm-generic/topology.h: guard cpumask_of_node() macro argument
fs/binfmt_elf.c: allocate initialized memory in fill_thread_core_info()
mm: remove VM_BUG_ON(PageSlab()) from page_mapcount()
IB/ipoib: Fix double free of skb in case of multicast traffic in CM mode
libceph: ignore pool overlay and cache logic on redirects
ALSA: hda/realtek - Add new codec supported for ALC287
ALSA: usb-audio: Quirks for Gigabyte TRX40 Aorus Master onboard audio
exec: Always set cap_ambient in cap_bprm_set_creds
ALSA: usb-audio: mixer: volume quirk for ESS Technology Asus USB DAC
ALSA: hda/realtek - Add a model for Thinkpad T570 without DAC workaround
ALSA: hwdep: fix a left shifting 1 by 31 UB bug
RDMA/pvrdma: Fix missing pci disable in pvrdma_pci_probe()
mmc: block: Fix use-after-free issue for rpmb
ARM: dts: bcm: HR2: Fix PPI interrupt types
ARM: dts: bcm2835-rpi-zero-w: Fix led polarity
ARM: dts/imx6q-bx50v3: Set display interface clock parents
IB/qib: Call kobject_put() when kobject_init_and_add() fails
gpio: exar: Fix bad handling for ida_simple_get error path
ARM: uaccess: fix DACR mismatch with nested exceptions
ARM: uaccess: integrate uaccess_save and uaccess_restore
ARM: uaccess: consolidate uaccess asm to asm/uaccess-asm.h
ARM: 8843/1: use unified assembler in headers
ARM: 8970/1: decompressor: increase tag size
Input: synaptics-rmi4 - fix error return code in rmi_driver_probe()
Input: synaptics-rmi4 - really fix attn_data use-after-free
Input: i8042 - add ThinkPad S230u to i8042 reset list
Input: dlink-dir685-touchkeys - fix a typo in driver name
Input: xpad - add custom init packet for Xbox One S controllers
Input: evdev - call input_flush_device() on release(), not flush()
Input: usbtouchscreen - add support for BonXeon TP
samples: bpf: Fix build error
cifs: Fix null pointer check in cifs_read
riscv: stacktrace: Fix undefined reference to `walk_stackframe'
IB/i40iw: Remove bogus call to netdev_master_upper_dev_get()
net: freescale: select CONFIG_FIXED_PHY where needed
usb: gadget: legacy: fix redundant initialization warnings
usb: dwc3: pci: Enable extcon driver for Intel Merrifield
cachefiles: Fix race between read_waiter and read_copier involving op->to_do
gfs2: move privileged user check to gfs2_quota_lock_check
net: microchip: encx24j600: add missed kthread_stop
ALSA: usb-audio: add mapping for ASRock TRX40 Creator
gpio: tegra: mask GPIO IRQs during IRQ shutdown
ARM: dts: rockchip: fix pinctrl sub nodename for spi in rk322x.dtsi
ARM: dts: rockchip: swap clock-names of gpu nodes
arm64: dts: rockchip: swap interrupts interrupt-names rk3399 gpu node
arm64: dts: rockchip: fix status for &gmac2phy in rk3328-evb.dts
ARM: dts: rockchip: fix phy nodename for rk3228-evb
mlxsw: spectrum: Fix use-after-free of split/unsplit/type_set in case reload fails
net/mlx4_core: fix a memory leak bug.
net: sun: fix missing release regions in cas_init_one().
net/mlx5: Annotate mutex destroy for root ns
net/mlx5e: Update netdev txq on completions during closure
sctp: Start shutdown on association restart if in SHUTDOWN-SENT state and socket is closed
sctp: Don't add the shutdown timer if its already been added
r8152: support additional Microsoft Surface Ethernet Adapter variant
net sched: fix reporting the first-time use timestamp
net: revert "net: get rid of an signed integer overflow in ip_idents_reserve()"
net: qrtr: Fix passing invalid reference to qrtr_local_enqueue()
net/mlx5: Add command entry handling completion
net: ipip: fix wrong address family in init error path
net: inet_csk: Fix so_reuseport bind-address cache in tb->fast*
__netif_receive_skb_core: pass skb by reference
net: dsa: mt7530: fix roaming from DSA user ports
dpaa_eth: fix usage as DSA master, try 3
ax25: fix setsockopt(SO_BINDTODEVICE)
ANDROID: modules: fix lockprove warning
FROMGIT: USB: dummy-hcd: use configurable endpoint naming scheme
UPSTREAM: usb: raw-gadget: fix null-ptr-deref when reenabling endpoints
UPSTREAM: usb: raw-gadget: documentation updates
UPSTREAM: usb: raw-gadget: support stalling/halting/wedging endpoints
UPSTREAM: usb: raw-gadget: fix gadget endpoint selection
UPSTREAM: usb: raw-gadget: improve uapi headers comments
UPSTREAM: usb: raw-gadget: fix return value of ep read ioctls
UPSTREAM: usb: raw-gadget: fix raw_event_queue_fetch locking
UPSTREAM: usb: raw-gadget: Fix copy_to/from_user() checks
f2fs: fix wrong discard space
f2fs: compress: don't compress any datas after cp stop
f2fs: remove unneeded return value of __insert_discard_tree()
f2fs: fix wrong value of tracepoint parameter
f2fs: protect new segment allocation in expand_inode_data
f2fs: code cleanup by removing ifdef macro surrounding
writeback: Avoid skipping inode writeback
ANDROID: GKI: Update the ABI
ANDROID: GKI: update whitelist
ANDROID: GKI: support mm_event for FS/IO/UFS path
ANDROID: net: bpf: permit redirect from ingress L3 to egress L2 devices at near max mtu
FROMGIT: driver core: Update device link status correctly for SYNC_STATE_ONLY links
UPSTREAM: driver core: Fix handling of SYNC_STATE_ONLY + STATELESS device links
BACKPORT: driver core: Fix SYNC_STATE_ONLY device link implementation
ANDROID: Bulk update the ABI xml and qcom whitelist
Revert "ANDROID: Incremental fs: Avoid continually recalculating hashes"
f2fs: avoid inifinite loop to wait for flushing node pages at cp_error
f2fs: compress: fix zstd data corruption
f2fs: add compressed/gc data read IO stat
f2fs: fix potential use-after-free issue
f2fs: compress: don't handle non-compressed data in workqueue
f2fs: remove redundant assignment to variable err
f2fs: refactor resize_fs to avoid meta updates in progress
f2fs: use round_up to enhance calculation
f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS
f2fs: Avoid double lock for cp_rwsem during checkpoint
f2fs: report delalloc reserve as non-free in statfs for project quota
f2fs: Fix wrong stub helper update_sit_info
f2fs: compress: let lz4 compressor handle output buffer budget properly
f2fs: remove blk_plugging in block_operations
f2fs: introduce F2FS_IOC_RELEASE_COMPRESS_BLOCKS
f2fs: shrink spinlock coverage
f2fs: correctly fix the parent inode number during fsync()
f2fs: introduce mempool for {,de}compress intermediate page allocation
f2fs: introduce f2fs_bmap_compress()
f2fs: support fiemap on compressed inode
f2fs: support partial truncation on compressed inode
f2fs: remove redundant compress inode check
f2fs: use strcmp() in parse_options()
f2fs: Use the correct style for SPDX License Identifier
Conflicts:
Documentation/devicetree/bindings
Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt
Documentation/devicetree/bindings/usb/dwc3.txt
drivers/media/v4l2-core/v4l2-ctrls.c
drivers/mmc/core/queue.c
drivers/mmc/host/sdhci-msm.c
drivers/scsi/ufs/ufs-qcom.c
drivers/slimbus/qcom-ngd-ctrl.c
drivers/usb/gadget/composite.c
fs/crypto/keyring.c
fs/f2fs/data.c
include/linux/fs.h
include/linux/usb/gadget.h
include/uapi/linux/v4l2-controls.h
kernel/sched/cpufreq_schedutil.c
kernel/sched/fair.c
kernel/time/tick-sched.c
mm/vmalloc.c
net/netlink/genetlink.c
net/qrtr/qrtr.c
sound/core/compress_offload.c
sound/soc/soc-compress.c
Fixed errors:
drivers/scsi/ufs/ufshcd.c
drivers/soc/qcom/rq_stats.c
Change-Id: I06ea6a6c3f239045e2947f27af617aa6f523bfdb
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2020-10-14 08:33:38 -06:00
|
|
|
if (rq_info.init == 1 && wake_callback &&
|
2016-08-23 17:07:11 -06:00
|
|
|
tick_do_timer_cpu == smp_processor_id()) {
|
|
|
|
/*
|
|
|
|
* wakeup user if needed
|
|
|
|
*/
|
2020-07-20 13:30:36 -06:00
|
|
|
wake_callback();
|
2016-08-23 17:07:11 -06:00
|
|
|
}
|
|
|
|
}
|
2017-05-15 06:56:50 -06:00
|
|
|
else
|
|
|
|
ts->next_tick = 0;
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2014-06-12 04:54:41 -06:00
|
|
|
/* No need to reprogram if we are in idle or full dynticks mode */
|
|
|
|
if (unlikely(ts->tick_stopped))
|
|
|
|
return HRTIMER_NORESTART;
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
hrtimer_forward(timer, now, tick_period);
|
|
|
|
|
|
|
|
return HRTIMER_RESTART;
|
|
|
|
}
|
|
|
|
|
2012-05-08 04:20:58 -06:00
|
|
|
static int sched_skew_tick;
|
|
|
|
|
2012-05-25 06:08:57 -06:00
|
|
|
static int __init skew_tick(char *str)
|
|
|
|
{
|
|
|
|
get_option(&str, &sched_skew_tick);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
early_param("skew_tick", skew_tick);
|
|
|
|
|
2007-02-16 02:28:03 -07:00
|
|
|
/**
|
|
|
|
* tick_setup_sched_timer - setup the tick emulation timer
|
|
|
|
*/
|
|
|
|
void tick_setup_sched_timer(void)
|
|
|
|
{
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2007-02-16 02:28:03 -07:00
|
|
|
ktime_t now = ktime_get();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Emulate tick processing via per-CPU hrtimers:
|
|
|
|
*/
|
|
|
|
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
|
|
|
ts->sched_timer.function = tick_sched_timer;
|
|
|
|
|
2016-07-01 04:42:35 -06:00
|
|
|
/* Get the next period (per-CPU) */
|
2008-09-01 16:02:30 -06:00
|
|
|
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2012-11-21 12:31:52 -07:00
|
|
|
/* Offset the tick to avert jiffies_lock contention. */
|
2012-05-08 04:20:58 -06:00
|
|
|
if (sched_skew_tick) {
|
|
|
|
u64 offset = ktime_to_ns(tick_period) >> 1;
|
|
|
|
do_div(offset, num_possible_cpus());
|
|
|
|
offset *= smp_processor_id();
|
|
|
|
hrtimer_add_expires_ns(&ts->sched_timer, offset);
|
|
|
|
}
|
|
|
|
|
2015-04-14 15:08:52 -06:00
|
|
|
hrtimer_forward(&ts->sched_timer, now, tick_period);
|
|
|
|
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
|
2015-05-26 16:50:33 -06:00
|
|
|
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
2008-08-20 17:37:38 -06:00
|
|
|
#endif /* HIGH_RES_TIMERS */
|
2007-02-16 02:28:03 -07:00
|
|
|
|
2011-08-10 15:21:01 -06:00
|
|
|
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
|
2007-02-16 02:28:03 -07:00
|
|
|
void tick_cancel_sched_timer(int cpu)
|
|
|
|
{
|
|
|
|
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
|
|
|
|
2008-08-20 17:37:38 -06:00
|
|
|
# ifdef CONFIG_HIGH_RES_TIMERS
|
2007-02-16 02:28:03 -07:00
|
|
|
if (ts->sched_timer.base)
|
|
|
|
hrtimer_cancel(&ts->sched_timer);
|
2008-08-20 17:37:38 -06:00
|
|
|
# endif
|
2008-03-04 15:59:55 -07:00
|
|
|
|
2013-05-03 07:02:50 -06:00
|
|
|
memset(ts, 0, sizeof(*ts));
|
2007-02-16 02:28:03 -07:00
|
|
|
}
|
2008-08-20 17:37:38 -06:00
|
|
|
#endif
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Async notification about clocksource changes
|
|
|
|
*/
|
|
|
|
void tick_clock_notify(void)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
for_each_possible_cpu(cpu)
|
|
|
|
set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Async notification about clock event changes
|
|
|
|
*/
|
|
|
|
void tick_oneshot_notify(void)
|
|
|
|
{
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
set_bit(0, &ts->check_clocks);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check, if a change happened, which makes oneshot possible.
|
|
|
|
*
|
|
|
|
* Called cyclic from the hrtimer softirq (driven by the timer
|
|
|
|
* softirq) allow_nohz signals, that we can switch into low-res nohz
|
|
|
|
* mode, because high resolution timers are disabled (either compile
|
2015-05-07 06:35:59 -06:00
|
|
|
* or runtime). Called with interrupts disabled.
|
2007-02-16 02:28:03 -07:00
|
|
|
*/
|
|
|
|
int tick_check_oneshot_change(int allow_nohz)
|
|
|
|
{
|
2014-08-17 11:30:25 -06:00
|
|
|
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
|
2007-02-16 02:28:03 -07:00
|
|
|
|
|
|
|
if (!test_and_clear_bit(0, &ts->check_clocks))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
|
|
|
|
return 0;
|
|
|
|
|
2008-02-08 05:19:24 -07:00
|
|
|
if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
|
2007-02-16 02:28:03 -07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!allow_nohz)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
tick_nohz_switch_to_nohz();
|
|
|
|
return 0;
|
|
|
|
}
|
2017-01-26 16:37:08 -07:00
|
|
|
|
|
|
|
ktime_t *get_next_event_cpu(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
|
|
|
|
}
|
2017-01-26 16:37:08 -07:00
|
|
|
EXPORT_SYMBOL_GPL(get_next_event_cpu);
|