x86: Reduce clock calibration time during slave cpu startup
Reduce the startup time for slave cpus. Adds hooks for an arch-specific function for clock calibration. These hooks are used on x86. If a newly started cpu has the same phys_proc_id as a core already active, uses the TSC for the delay loop and has a CONSTANT_TSC, use the already-calculated value of loops_per_jiffy. This patch reduces the time required to start slave cpus on a 4096 cpu system from: 465 sec OLD 62 sec NEW This reduces boot time on a 4096p system by almost 7 minutes. Nice... Signed-off-by: Jack Steiner <steiner@sgi.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: John Stultz <john.stultz@linaro.org> [fix CONFIG_SMP=n build] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
1ea7c6737c
commit
b565201cf7
3 changed files with 46 additions and 5 deletions
|
@ -207,22 +207,28 @@ static void __cpuinit smp_callin(void)
|
|||
* Need to setup vector mappings before we enable interrupts.
|
||||
*/
|
||||
setup_vector_irq(smp_processor_id());
|
||||
|
||||
/*
|
||||
* Save our processor parameters. Note: this information
|
||||
* is needed for clock calibration.
|
||||
*/
|
||||
smp_store_cpu_info(cpuid);
|
||||
|
||||
/*
|
||||
* Get our bogomips.
|
||||
* Update loops_per_jiffy in cpu_data. Previous call to
|
||||
* smp_store_cpu_info() stored a value that is close but not as
|
||||
* accurate as the value just calculated.
|
||||
*
|
||||
* Need to enable IRQs because it can take longer and then
|
||||
* the NMI watchdog might kill us.
|
||||
*/
|
||||
local_irq_enable();
|
||||
calibrate_delay();
|
||||
cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
|
||||
local_irq_disable();
|
||||
pr_debug("Stack at about %p\n", &cpuid);
|
||||
|
||||
/*
|
||||
* Save our processor parameters
|
||||
*/
|
||||
smp_store_cpu_info(cpuid);
|
||||
|
||||
/*
|
||||
* This must be done before setting cpu_online_mask
|
||||
* or calling notify_cpu_starting.
|
||||
|
|
|
@ -995,3 +995,23 @@ void __init tsc_init(void)
|
|||
check_system_tsc_reliable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* If we have a constant TSC and are using the TSC for the delay loop,
|
||||
* we can skip clock calibration if another cpu in the same socket has already
|
||||
* been calibrated. This assumes that CONSTANT_TSC applies to all
|
||||
* cpus in the socket - this should be a safe assumption.
|
||||
*/
|
||||
unsigned long __cpuinit calibrate_delay_is_known(void)
|
||||
{
|
||||
int i, cpu = smp_processor_id();
|
||||
|
||||
if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
|
||||
return 0;
|
||||
|
||||
for_each_online_cpu(i)
|
||||
if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
|
||||
return cpu_data(i).loops_per_jiffy;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -246,6 +246,19 @@ static unsigned long __cpuinit calibrate_delay_converge(void)
|
|||
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
|
||||
|
||||
/*
|
||||
* Check if cpu calibration delay is already known. For example,
|
||||
* some processors with multi-core sockets may have all cores
|
||||
* with the same calibration delay.
|
||||
*
|
||||
* Architectures should override this function if a faster calibration
|
||||
* method is available.
|
||||
*/
|
||||
unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cpuinit calibrate_delay(void)
|
||||
{
|
||||
unsigned long lpj;
|
||||
|
@ -265,6 +278,8 @@ void __cpuinit calibrate_delay(void)
|
|||
lpj = lpj_fine;
|
||||
pr_info("Calibrating delay loop (skipped), "
|
||||
"value calculated using timer frequency.. ");
|
||||
} else if ((lpj = calibrate_delay_is_known())) {
|
||||
;
|
||||
} else if ((lpj = calibrate_delay_direct()) != 0) {
|
||||
if (!printed)
|
||||
pr_info("Calibrating delay using timer "
|
||||
|
|
Loading…
Reference in a new issue