ACPI: Processor native C-states using MWAIT

Intel processors starting with the Core Duo support
support processor native C-state using the MWAIT instruction.
Refer: Intel Architecture Software Developer's Manual
http://www.intel.com/design/Pentium4/manuals/253668.htm

Platform firmware exports the support for Native C-state to OS using
ACPI _PDC and _CST methods.
Refer: Intel Processor Vendor-Specific ACPI: Interface Specification
http://www.intel.com/technology/iapc/acpi/downloads/302223.htm

With Processor Native C-state, we use 'MWAIT' instruction on the processor
to enter different C-states (C1, C2, C3).  We won't use the special IO
ports to enter C-state and no SMM mode etc required to enter C-state.
Overall this will mean better C-state support.

One major advantage of using MWAIT for all C-states is, with this and
"treat interrupt as break event" feature of MWAIT, we can now get accurate
timing for the time spent in C1, C2, ..  states.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>
This commit is contained in:
Venkatesh Pallipadi 2006-09-25 16:28:13 -07:00 committed by Len Brown
parent b4bd8c6643
commit 991528d734
8 changed files with 242 additions and 56 deletions

View file

@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/cpu.h>
#include <acpi/processor.h>
#include <asm/acpi.h>
@ -41,5 +42,124 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
flags->bm_check = 1;
}
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
/* The code below handles cstate entry with monitor-mwait pair on Intel*/
struct cstate_entry_s {
struct {
unsigned int eax;
unsigned int ecx;
} states[ACPI_PROCESSOR_MAX_POWER];
};
static struct cstate_entry_s *cpu_cstate_entry; /* per CPU ptr */
static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
#define MWAIT_SUBSTATE_MASK (0xf)
#define MWAIT_SUBSTATE_SIZE (4)
#define CPUID_MWAIT_LEAF (5)
#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
#define NATIVE_CSTATE_BEYOND_HALT (2)
int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct acpi_processor_cx *cx, struct acpi_power_register *reg)
{
struct cstate_entry_s *percpu_entry;
struct cpuinfo_x86 *c = cpu_data + cpu;
cpumask_t saved_mask;
int retval;
unsigned int eax, ebx, ecx, edx;
unsigned int edx_part;
unsigned int cstate_type; /* C-state type and not ACPI C-state type */
unsigned int num_cstate_subtype;
if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF )
return -1;
if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
return -1;
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
percpu_entry->states[cx->index].eax = 0;
percpu_entry->states[cx->index].ecx = 0;
/* Make sure we are running on right CPU */
saved_mask = current->cpus_allowed;
retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
if (retval)
return -1;
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
/* Check whether this particular cx_type (in CST) is supported or not */
cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1;
edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
retval = 0;
if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) {
retval = -1;
goto out;
}
/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) {
retval = -1;
goto out;
}
percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
/* Use the hint in CST */
percpu_entry->states[cx->index].eax = cx->address;
if (!mwait_supported[cstate_type]) {
mwait_supported[cstate_type] = 1;
printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
"state\n", cx->type);
}
out:
set_cpus_allowed(current, saved_mask);
return retval;
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
{
unsigned int cpu = smp_processor_id();
struct cstate_entry_s *percpu_entry;
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
percpu_entry->states[cx->index].ecx);
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
static int __init ffh_cstate_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_INTEL)
return -1;
cpu_cstate_entry = alloc_percpu(struct cstate_entry_s);
return 0;
}
static void __exit ffh_cstate_exit(void)
{
if (cpu_cstate_entry) {
free_percpu(cpu_cstate_entry);
cpu_cstate_entry = NULL;
}
}
arch_initcall(ffh_cstate_init);
__exitcall(ffh_cstate_exit);

View file

@ -236,18 +236,26 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
* We execute MONITOR against need_resched and enter optimized wait state
* through MWAIT. Whenever someone changes need_resched, we would be woken
* up from MWAIT (without an IPI).
*
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__mwait(eax, ecx);
}
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
local_irq_enable();
while (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (need_resched())
break;
__mwait(0, 0);
}
while (!need_resched())
mwait_idle_with_hints(0, 0);
}
void __devinit select_idle_routine(const struct cpuinfo_x86 *c)

View file

@ -238,18 +238,26 @@ void cpu_idle (void)
* We execute MONITOR against need_resched and enter optimized wait state
* through MWAIT. Whenever someone changes need_resched, we would be woken
* up from MWAIT (without an IPI).
*
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__mwait(eax, ecx);
}
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
local_irq_enable();
while (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (need_resched())
break;
__mwait(0, 0);
}
while (!need_resched())
mwait_idle_with_hints(0,0);
}
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)

View file

@ -219,6 +219,23 @@ static void acpi_safe_halt(void)
static atomic_t c3_cpu_count;
/* Common C-state entry for C2, C3, .. */
static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
{
if (cstate->space_id == ACPI_CSTATE_FFH) {
/* Call into architectural FFH based C-state */
acpi_processor_ffh_cstate_enter(cstate);
} else {
int unused;
/* IO port based C-state */
inb(cstate->address);
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
unused = inl(acpi_fadt.xpm_tmr_blk.address);
}
}
static void acpi_processor_idle(void)
{
struct acpi_processor *pr = NULL;
@ -361,11 +378,7 @@ static void acpi_processor_idle(void)
/* Get start time (ticks) */
t1 = inl(acpi_fadt.xpm_tmr_blk.address);
/* Invoke C2 */
inb(cx->address);
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
t2 = inl(acpi_fadt.xpm_tmr_blk.address);
acpi_cstate_enter(cx);
/* Get end time (ticks) */
t2 = inl(acpi_fadt.xpm_tmr_blk.address);
@ -401,9 +414,7 @@ static void acpi_processor_idle(void)
/* Get start time (ticks) */
t1 = inl(acpi_fadt.xpm_tmr_blk.address);
/* Invoke C3 */
inb(cx->address);
/* Dummy wait op (see above) */
t2 = inl(acpi_fadt.xpm_tmr_blk.address);
acpi_cstate_enter(cx);
/* Get end time (ticks) */
t2 = inl(acpi_fadt.xpm_tmr_blk.address);
if (pr->flags.bm_check) {
@ -628,20 +639,16 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
return 0;
}
static int acpi_processor_get_power_info_default_c1(struct acpi_processor *pr)
static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
{
/* Zero initialize all the C-states info. */
memset(pr->power.states, 0, sizeof(pr->power.states));
/* set the first C-State to C1 */
pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
/* the C0 state only exists as a filler in our array,
* and all processors need to support C1 */
if (!pr->power.states[ACPI_STATE_C1].valid) {
/* set the first C-State to C1 */
/* all processors need to support C1 */
pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
pr->power.states[ACPI_STATE_C1].valid = 1;
}
/* the C0 state only exists as a filler in our array */
pr->power.states[ACPI_STATE_C0].valid = 1;
pr->power.states[ACPI_STATE_C1].valid = 1;
return 0;
}
@ -658,12 +665,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
if (nocst)
return -ENODEV;
current_count = 1;
/* Zero initialize C2 onwards and prepare for fresh CST lookup */
for (i = 2; i < ACPI_PROCESSOR_MAX_POWER; i++)
memset(&(pr->power.states[i]), 0,
sizeof(struct acpi_processor_cx));
current_count = 0;
status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
if (ACPI_FAILURE(status)) {
@ -718,22 +720,39 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
(reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
continue;
cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ?
0 : reg->address;
/* There should be an easy way to extract an integer... */
obj = (union acpi_object *)&(element->package.elements[1]);
if (obj->type != ACPI_TYPE_INTEGER)
continue;
cx.type = obj->integer.value;
/*
* Some buggy BIOSes won't list C1 in _CST -
* Let acpi_processor_get_power_info_default() handle them later
*/
if (i == 1 && cx.type != ACPI_STATE_C1)
current_count++;
if ((cx.type != ACPI_STATE_C1) &&
(reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO))
continue;
cx.address = reg->address;
cx.index = current_count + 1;
if ((cx.type < ACPI_STATE_C2) || (cx.type > ACPI_STATE_C3))
continue;
cx.space_id = ACPI_CSTATE_SYSTEMIO;
if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
if (acpi_processor_ffh_cstate_probe
(pr->id, &cx, reg) == 0) {
cx.space_id = ACPI_CSTATE_FFH;
} else if (cx.type != ACPI_STATE_C1) {
/*
* C1 is a special case where FIXED_HARDWARE
* can be handled in non-MWAIT way as well.
* In that case, save this _CST entry info.
* That is, we retain space_id of SYSTEM_IO for
* halt based C1.
* Otherwise, ignore this info and continue.
*/
continue;
}
}
obj = (union acpi_object *)&(element->package.elements[2]);
if (obj->type != ACPI_TYPE_INTEGER)
@ -938,12 +957,18 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
/* NOTE: the idle thread may not be running while calling
* this function */
/* Adding C1 state */
acpi_processor_get_power_info_default_c1(pr);
/* Zero initialize all the C-states info. */
memset(pr->power.states, 0, sizeof(pr->power.states));
result = acpi_processor_get_power_info_cst(pr);
if (result == -ENODEV)
acpi_processor_get_power_info_fadt(pr);
if (result)
return result;
acpi_processor_get_power_info_default(pr);
pr->power.count = acpi_processor_power_verify(pr);
/*

View file

@ -13,6 +13,7 @@
#define ACPI_PDC_SMP_C_SWCOORD (0x0040)
#define ACPI_PDC_SMP_T_SWCOORD (0x0080)
#define ACPI_PDC_C_C1_FFH (0x0100)
#define ACPI_PDC_C_C2C3_FFH (0x0200)
#define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
@ -23,8 +24,10 @@
ACPI_PDC_SMP_P_SWCOORD | \
ACPI_PDC_P_FFH)
#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \
ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT)
#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \
ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
ACPI_PDC_C_C1_FFH | \
ACPI_PDC_C_C2C3_FFH)
#endif /* __PDC_INTEL_H__ */

View file

@ -29,6 +29,9 @@
#define DOMAIN_COORD_TYPE_SW_ANY 0xfd
#define DOMAIN_COORD_TYPE_HW_ALL 0xfe
#define ACPI_CSTATE_SYSTEMIO (0)
#define ACPI_CSTATE_FFH (1)
/* Power Management */
struct acpi_processor_cx;
@ -58,6 +61,8 @@ struct acpi_processor_cx {
u8 valid;
u8 type;
u32 address;
u8 space_id;
u8 index;
u32 latency;
u32 latency_ticks;
u32 power;
@ -206,6 +211,9 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
#ifdef ARCH_HAS_POWER_INIT
void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
unsigned int cpu);
int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct acpi_processor_cx *cx, struct acpi_power_register *reg);
void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate);
#else
static inline void acpi_processor_power_init_bm_check(struct
acpi_processor_flags
@ -214,6 +222,16 @@ static inline void acpi_processor_power_init_bm_check(struct
flags->bm_check = 1;
return;
}
static inline int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct acpi_processor_cx *cx, struct acpi_power_register *reg)
{
return -1;
}
static inline void acpi_processor_ffh_cstate_enter(
struct acpi_processor_cx *cstate)
{
return;
}
#endif
/* in processor_perflib.c */

View file

@ -306,6 +306,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
: :"a" (eax), "c" (ecx));
}
extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
/* from system description table in BIOS. Mostly for MCA use, but
others may find it useful. */
extern unsigned int machine_id;

View file

@ -475,6 +475,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
: :"a" (eax), "c" (ecx));
}
extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
#define stack_current() \
({ \
struct thread_info *ti; \