From 0a3b15ac3cc3ddc791901e12bdc930b5fa11a30a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 May 2013 21:54:37 +0200 Subject: [PATCH 01/30] ACPI / PM: Move processor suspend/resume to syscore_ops The system suspend routine of the ACPI processor driver saves the BUS_MASTER_RLD register and its resume routine restores it. However, there can be only one such register in the system and it really should be saved after non-boot CPUs have been offlined and restored before they are put back online during resume. For this reason, move the saving and restoration of BUS_MASTER_RLD to syscore suspend and syscore resume, respectively, and drop the no longer necessary suspend/resume callbacks from the ACPI processor driver. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_driver.c | 8 ++++---- drivers/acpi/processor_idle.c | 29 +++++++++++++++++++---------- include/acpi/processor.h | 10 ++++++++-- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index bec717ffd25f..c266cdc11784 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -95,9 +95,6 @@ static const struct acpi_device_id processor_device_ids[] = { }; MODULE_DEVICE_TABLE(acpi, processor_device_ids); -static SIMPLE_DEV_PM_OPS(acpi_processor_pm, - acpi_processor_suspend, acpi_processor_resume); - static struct acpi_driver acpi_processor_driver = { .name = "processor", .class = ACPI_PROCESSOR_CLASS, @@ -107,7 +104,6 @@ static struct acpi_driver acpi_processor_driver = { .remove = acpi_processor_remove, .notify = acpi_processor_notify, }, - .drv.pm = &acpi_processor_pm, }; #define INSTALL_NOTIFY_HANDLER 1 @@ -934,6 +930,8 @@ static int __init acpi_processor_init(void) if (result < 0) return result; + acpi_processor_syscore_init(); + acpi_processor_install_hotplug_notify(); acpi_thermal_cpufreq_init(); @@ -956,6 +954,8 @@ static void __exit acpi_processor_exit(void) acpi_processor_uninstall_hotplug_notify(); + acpi_processor_syscore_exit(); + acpi_bus_unregister_driver(&acpi_processor_driver); return; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f0df2c9434d2..eb133c77aadb 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -34,6 +34,7 @@ #include /* need_resched() */ #include #include +#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -210,33 +211,41 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr, #endif +#ifdef CONFIG_PM_SLEEP static u32 saved_bm_rld; -static void acpi_idle_bm_rld_save(void) +int acpi_processor_suspend(void) { acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); + return 0; } -static void acpi_idle_bm_rld_restore(void) + +void acpi_processor_resume(void) { u32 resumed_bm_rld; acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); + if (resumed_bm_rld == saved_bm_rld) + return; - if (resumed_bm_rld != saved_bm_rld) - acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); + acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); } -int acpi_processor_suspend(struct device *dev) +static struct syscore_ops acpi_processor_syscore_ops = { + .suspend = acpi_processor_suspend, + .resume = acpi_processor_resume, +}; + +void acpi_processor_syscore_init(void) { - acpi_idle_bm_rld_save(); - return 0; + register_syscore_ops(&acpi_processor_syscore_ops); } -int acpi_processor_resume(struct device *dev) +void acpi_processor_syscore_exit(void) { - acpi_idle_bm_rld_restore(); - return 0; + unregister_syscore_ops(&acpi_processor_syscore_ops); } +#endif /* CONFIG_PM_SLEEP */ #if defined(CONFIG_X86) static void tsc_check_state(int state) diff --git a/include/acpi/processor.h b/include/acpi/processor.h index b327b5a9296d..ea69367fdd3b 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -329,10 +329,16 @@ int acpi_processor_power_init(struct acpi_processor *pr); int acpi_processor_power_exit(struct acpi_processor *pr); int acpi_processor_cst_has_changed(struct acpi_processor *pr); int acpi_processor_hotplug(struct acpi_processor *pr); -int acpi_processor_suspend(struct device *dev); -int acpi_processor_resume(struct device *dev); extern struct cpuidle_driver acpi_idle_driver; +#ifdef CONFIG_PM_SLEEP +void acpi_processor_syscore_init(void); +void acpi_processor_syscore_exit(void); +#else +static inline void acpi_processor_syscore_init(void) {} +static inline void acpi_processor_syscore_exit(void) {} +#endif + /* in processor_thermal.c */ int acpi_processor_get_limit_info(struct acpi_processor *pr); extern const struct thermal_cooling_device_ops processor_cooling_ops; From 36200af867e2cf5adc1ff77214b2524f6111736f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Simon=20M=C3=B6ller?= Date: Tue, 30 Apr 2013 09:39:04 +0000 Subject: [PATCH 02/30] ACPI: Fix section to __init. Align with usage in acpixf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes warning during compilation with clang. [rjw: Subject and changelog] Signed-off-by: Jan-Simon Möller Signed-off-by: Rafael J. Wysocki --- include/acpi/acpiosxf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 5b3d2bd4813a..64b8c7639520 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -77,7 +77,7 @@ struct acpi_signal_fatal_info { /* * OSL Initialization and shutdown primitives */ -acpi_status __initdata acpi_os_initialize(void); +acpi_status __init acpi_os_initialize(void); acpi_status acpi_os_terminate(void); From 4ef366c583d6180b1c951147869ee5a3038834f2 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 6 May 2013 08:23:43 +0000 Subject: [PATCH 03/30] ACPI video: ignore BIOS initial backlight value for HP 1000 On HP 1000 lapops, BIOS reports minimum backlight on boot and causes backlight to dim completely. This ignores the initial backlight values and set to max brightness. References:: https://bugs.launchpad.net/bugs/1167760 Signed-off-by: Alex Hung Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index c3932d0876e0..5b32e15a65ce 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -456,6 +456,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dm4 Notebook PC"), }, }, + { + .callback = video_ignore_initial_backlight, + .ident = "HP 1000 Notebook PC", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP 1000 Notebook PC"), + }, + }, {} }; From 28fe5c825f8e15744d04c7c1b8df197950923ecd Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Mon, 6 May 2013 03:23:40 +0000 Subject: [PATCH 04/30] ACPI / EC: Restart transaction even when the IBF flag set The EC driver works abnormally with IBF flag always set. IBF means "The host has written a byte of data to the command or data port, but the embedded controller has not yet read it". If IBF is set in the EC status and not cleared, this will cause all subsequent EC requests to fail with a timeout error. Change the EC driver so that it doesn't refuse to restart a transaction if IBF is set in the status. Also increase the number of transaction restarts to 5, as it turns out that 2 is not sufficient in some cases. This bug happens on several different machines (Asus V1S, Dell Latitude E6530, Samsung R719, Acer Aspire 5930G, Sony Vaio SR19VN and others). [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=14733 References: https://bugzilla.kernel.org/show_bug.cgi?id=15560 References: https://bugzilla.kernel.org/show_bug.cgi?id=15946 References: https://bugzilla.kernel.org/show_bug.cgi?id=42945 References: https://bugzilla.kernel.org/show_bug.cgi?id=48221 Signed-off-by: Lan Tianyu Cc: All Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index d45b2871d33b..edc00818c803 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -223,7 +223,7 @@ static int ec_check_sci_sync(struct acpi_ec *ec, u8 state) static int ec_poll(struct acpi_ec *ec) { unsigned long flags; - int repeat = 2; /* number of command restarts */ + int repeat = 5; /* number of command restarts */ while (repeat--) { unsigned long delay = jiffies + msecs_to_jiffies(ec_delay); @@ -241,8 +241,6 @@ static int ec_poll(struct acpi_ec *ec) } advance_transaction(ec, acpi_ec_read_status(ec)); } while (time_before(jiffies, delay)); - if (acpi_ec_read_status(ec) & ACPI_EC_FLAG_IBF) - break; pr_debug(PREFIX "controller reset, restart transaction\n"); spin_lock_irqsave(&ec->lock, flags); start_transaction(ec); From 0ab5bb64937d76c660c29813d8de0f4b47bf7550 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 8 May 2013 07:28:46 +0000 Subject: [PATCH 05/30] ACPI / AC: Add sleep quirk for Thinkpad e530 The Thinkpad e530's BIOS notifies the AC device first and then sleeps for certain amount of time before doing real work in the EC event handler (_Qxx): Method (_Q27, 0, NotSerialized) { Notify (AC, 0x80) Sleep (0x03E8) Store (Zero, PWRS) PNOT () } This causes the AC driver to report an outdated AC state to user space, because it reads the state information from the device while the EC handler is sleeping. Introduce a quirk to cause the AC driver to wait in acpi_ac_notify() before calling acpi_ac_get_state() on systems known to have this problem and add Thinkpad e530 to the list of quirky machines (with a 1s delay which has been verified to be sufficient for that machine). [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=45221 Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ac.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 00d2efd674df..4f4e741d34b2 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #ifdef CONFIG_ACPI_PROCFS_POWER #include #include @@ -74,6 +76,8 @@ static int acpi_ac_resume(struct device *dev); #endif static SIMPLE_DEV_PM_OPS(acpi_ac_pm, NULL, acpi_ac_resume); +static int ac_sleep_before_get_state_ms; + static struct acpi_driver acpi_ac_driver = { .name = "ac", .class = ACPI_AC_CLASS, @@ -252,6 +256,16 @@ static void acpi_ac_notify(struct acpi_device *device, u32 event) case ACPI_AC_NOTIFY_STATUS: case ACPI_NOTIFY_BUS_CHECK: case ACPI_NOTIFY_DEVICE_CHECK: + /* + * A buggy BIOS may notify AC first and then sleep for + * a specific time before doing actual operations in the + * EC event handler (_Qxx). This will cause the AC state + * reported by the ACPI event to be incorrect, so wait for a + * specific time for the EC event handler to make progress. + */ + if (ac_sleep_before_get_state_ms > 0) + msleep(ac_sleep_before_get_state_ms); + acpi_ac_get_state(ac); acpi_bus_generate_proc_event(device, event, (u32) ac->state); acpi_bus_generate_netlink_event(device->pnp.device_class, @@ -264,6 +278,24 @@ static void acpi_ac_notify(struct acpi_device *device, u32 event) return; } +static int thinkpad_e530_quirk(const struct dmi_system_id *d) +{ + ac_sleep_before_get_state_ms = 1000; + return 0; +} + +static struct dmi_system_id ac_dmi_table[] = { + { + .callback = thinkpad_e530_quirk, + .ident = "thinkpad e530", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "32597CG"), + }, + }, + {}, +}; + static int acpi_ac_add(struct acpi_device *device) { int result = 0; @@ -312,6 +344,7 @@ static int acpi_ac_add(struct acpi_device *device) kfree(ac); } + dmi_check_system(ac_dmi_table); return result; } From bb08be78721bed02b147448d2cb404babc369cfe Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Apr 2013 13:24:44 +0000 Subject: [PATCH 06/30] cpufreq: ARM big LITTLE: Select PM_OPP The ARM big LITTLE cpufreq driver uses the OPP layer for its functionality. Select it in Kconfig. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index f3af18b9acc5..a78ce4490c61 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -5,6 +5,7 @@ config ARM_BIG_LITTLE_CPUFREQ tristate depends on ARM_CPU_TOPOLOGY + select PM_OPP config ARM_DT_BL_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver probed via DT" From 996905f3334506296c8a4bec63695bcf7a9df159 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Apr 2013 13:24:45 +0000 Subject: [PATCH 07/30] cpufreq: ARM big LITTLE DT: Return correct transition latency By mistake we are returning zero for successful call to dt_get_transition_latency(), whereas we should return transition_latency. Fix that. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little_dt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c index 44be3115375c..d5ae4d2362e3 100644 --- a/drivers/cpufreq/arm_big_little_dt.c +++ b/drivers/cpufreq/arm_big_little_dt.c @@ -78,7 +78,7 @@ static int dt_get_transition_latency(struct device *cpu_dev) of_node_put(np); of_node_put(parent); - return 0; + return transition_latency; } return -ENODEV; From 3c792e0fe17858105b72516f709c48fc8e4a7523 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Apr 2013 13:24:46 +0000 Subject: [PATCH 08/30] cpufreq: ARM big LITTLE DT: Return CPUFREQ_ETERNAL if clock-latency isn't found If "/cpus" node isn't present or "clock-latency" isn't defined we are returning error currently. Let's return CPUFREQ_ETERNAL instead, so that we don't fail. Flag appropriate messages to user in such cases. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little_dt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c index d5ae4d2362e3..173ed059d95f 100644 --- a/drivers/cpufreq/arm_big_little_dt.c +++ b/drivers/cpufreq/arm_big_little_dt.c @@ -66,8 +66,8 @@ static int dt_get_transition_latency(struct device *cpu_dev) parent = of_find_node_by_path("/cpus"); if (!parent) { - pr_err("failed to find OF /cpus\n"); - return -ENOENT; + pr_info("Failed to find OF /cpus. Use CPUFREQ_ETERNAL transition latency\n"); + return CPUFREQ_ETERNAL; } for_each_child_of_node(parent, np) { @@ -81,7 +81,8 @@ static int dt_get_transition_latency(struct device *cpu_dev) return transition_latency; } - return -ENODEV; + pr_info("clock-latency isn't found, use CPUFREQ_ETERNAL transition latency\n"); + return CPUFREQ_ETERNAL; } static struct cpufreq_arm_bL_ops dt_bL_ops = { From 4521adf85cd18e4652b9285846835f74418bd88f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Apr 2013 13:24:47 +0000 Subject: [PATCH 09/30] cpufreq: ARM big LITTLE: Move cpu_to_cluster() to arm_big_little.h The cpu_to_cluster() function may be used by glue drivers, so it's better to keep it in arm_big_little.h. [rjw: Changelog] Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little.c | 5 ----- drivers/cpufreq/arm_big_little.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index dbdf677d2f36..fd7beed801f8 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -40,11 +40,6 @@ static struct clk *clk[MAX_CLUSTERS]; static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS]; static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)}; -static int cpu_to_cluster(int cpu) -{ - return topology_physical_package_id(cpu); -} - static unsigned int bL_cpufreq_get(unsigned int cpu) { u32 cur_cluster = cpu_to_cluster(cpu); diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h index 70f18fc12d4a..79b2ce17884d 100644 --- a/drivers/cpufreq/arm_big_little.h +++ b/drivers/cpufreq/arm_big_little.h @@ -34,6 +34,11 @@ struct cpufreq_arm_bL_ops { int (*init_opp_table)(struct device *cpu_dev); }; +static inline int cpu_to_cluster(int cpu) +{ + return topology_physical_package_id(cpu); +} + int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops); void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops); From 2b80f3138e8470194745a6b954b4905060ab4067 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Apr 2013 13:24:48 +0000 Subject: [PATCH 10/30] cpufreq: ARM big LITTLE: Improve print message The message printed at the end of driver->init() doesn't include the "cpufreq" string at all and so is difficult to find in dmesg. Add function name to that message to clearly state where the message is coming from. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index fd7beed801f8..5d7f53fcd6f5 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -187,7 +187,7 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy) cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); - dev_info(cpu_dev, "CPU %d initialized\n", policy->cpu); + dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu); return 0; } From a97c98adddbe98e824b69e6d7b320c8dc91fe581 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 30 Apr 2013 14:32:17 +0000 Subject: [PATCH 11/30] cpufreq: governors: Fix CPUFREQ_GOV_POLICY_{INIT|EXIT} notifiers There are two types of INIT/EXIT activities that we need to do for governors: - Done only once per governor (doesn't depend how many instances of the governor there are). eg: cpufreq_register_notifier() for conservative governor. - Done per governor instance, eg: sysfs_{create|remove}_group(). There were some corner cases where current code isn't able to handle them separately and so failing for some test cases. We use two separate variables now for keeping track of above two requirements. - governor->initialized for first one - dbs_data->usage_count for per governor instance Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 11 +++++++---- drivers/cpufreq/cpufreq_governor.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 443442df113b..5af40ad82d23 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -255,6 +255,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (have_governor_per_policy()) { WARN_ON(dbs_data); } else if (dbs_data) { + dbs_data->usage_count++; policy->governor_data = dbs_data; return 0; } @@ -266,6 +267,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, } dbs_data->cdata = cdata; + dbs_data->usage_count = 1; rc = cdata->init(dbs_data); if (rc) { pr_err("%s: POLICY_INIT: init() failed\n", __func__); @@ -294,7 +296,8 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER)); - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + if ((cdata->governor == GOV_CONSERVATIVE) && + (!policy->governor->initialized)) { struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; cpufreq_register_notifier(cs_ops->notifier_block, @@ -306,12 +309,12 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, return 0; case CPUFREQ_GOV_POLICY_EXIT: - if ((policy->governor->initialized == 1) || - have_governor_per_policy()) { + if (!--dbs_data->usage_count) { sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr(dbs_data)); - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) && + (policy->governor->initialized == 1)) { struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; cpufreq_unregister_notifier(cs_ops->notifier_block, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8ac33538d0bd..e16a96130cb3 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -211,6 +211,7 @@ struct common_dbs_data { struct dbs_data { struct common_dbs_data *cdata; unsigned int min_sampling_rate; + int usage_count; void *tuners; /* dbs_mutex protects dbs_enable in governor start/stop */ From d96038e0fa00f42a5d6711884bef0a725cdc70bb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 30 Apr 2013 14:32:18 +0000 Subject: [PATCH 12/30] cpufreq: Issue CPUFREQ_GOV_POLICY_EXIT notifier before dropping policy refcount We must call __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT) before calling cpufreq_cpu_put(data), so that policy kobject have valid fields. Otherwise, removing last online cpu of policy->cpus causes this crash for ondemand/conservative governor. [] (sysfs_find_dirent+0xe/0xa8) from [] (sysfs_get_dirent+0x21/0x58) [] (sysfs_get_dirent+0x21/0x58) from [] (sysfs_remove_group+0x85/0xbc) [] (sysfs_remove_group+0x85/0xbc) from [] (cpufreq_governor_dbs+0x369/0x4a0) [] (cpufreq_governor_dbs+0x369/0x4a0) from [] (__cpufreq_governor+0x2b/0x8c) [] (__cpufreq_governor+0x2b/0x8c) from [] (__cpufreq_remove_dev.isra.12+0x15b/0x250) [] (__cpufreq_remove_dev.isra.12+0x15b/0x250) from [] (cpufreq_cpu_callback+0x2f/0x3c) [] (cpufreq_cpu_callback+0x2f/0x3c) from [] (notifier_call_chain+0x45/0x54) [] (notifier_call_chain+0x45/0x54) from [] (__cpu_notify+0x1d/0x34) [] (__cpu_notify+0x1d/0x34) from [] (_cpu_down+0x63/0x1ac) [] (_cpu_down+0x63/0x1ac) from [] (cpu_down+0x1b/0x30) [] (cpu_down+0x1b/0x30) from [] (store_online+0x27/0x54) [] (store_online+0x27/0x54) from [] (dev_attr_store+0x11/0x18) [] (dev_attr_store+0x11/0x18) from [] (sysfs_write_file+0xed/0x114) [] (sysfs_write_file+0xed/0x114) from [] (vfs_write+0x65/0xd8) [] (vfs_write+0x65/0xd8) from [] (sys_write+0x2f/0x50) [] (sys_write+0x2f/0x50) from [] (ret_fast_syscall+0x1/0x52) Of course this only impacted drivers which have have_governor_per_policy set to true. i.e. big LITTLE cpufreq driver. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1b8a48eaf90f..b7acfd153bf9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1075,14 +1075,14 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif __func__, cpu_dev->id, cpu); } + if ((cpus == 1) && (cpufreq_driver->target)) + __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); + pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); cpufreq_cpu_put(data); /* If cpu is last user of policy, free policy */ if (cpus == 1) { - if (cpufreq_driver->target) - __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); - lock_policy_rwsem_read(cpu); kobj = &data->kobj; cmp = &data->kobj_unregister; From fc31d6f55908759530462998d0464a9f124b1c32 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 1 May 2013 13:38:12 +0000 Subject: [PATCH 13/30] cpufreq: cpufreq-cpu0: defer probe when regulator is not ready With commit 1e4b545, regulator_get will now return -EPROBE_DEFER when the cpu0-supply node is present, but the regulator is not yet registered. It is possible for this to occur when the regulator registration by itself might be defered due to some dependent interface not yet instantiated. For example: an regulator which uses I2C and GPIO might need both systems available before proceeding, in this case, the regulator might defer it's registration. However, the cpufreq-cpu0 driver assumes that any un-successful return result is equivalent of failure. When the regulator_get returns failure other than -EPROBE_DEFER, it makes sense to assume that supply node is not present and proceed with the assumption that only clock control is necessary in the platform. With this change, we can now handle the following conditions: a) cpu0-supply binding is not present, regulator_get will return appropriate error result, resulting in cpufreq-cpu0 driver controlling just the clock. b) cpu0-supply binding is present, regulator_get returns -EPROBE_DEFER, we retry resulting in cpufreq-cpu0 driver registering later once the regulator is available. c) cpu0-supply binding is present, regulator_get returns -EPROBE_DEFER, however, regulator never registers, we retry until cpufreq-cpu0 driver fails to register pointing at device tree information bug. However, in this case, the fact that cpufreq-cpu0 operates with clock only when the DT binding clearly indicates need of a supply is a bug of it's own. d) cpu0-supply gets an regulator at probe - cpufreq-cpu0 driver controls both the clock and regulator Signed-off-by: Nishanth Menon Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-cpu0.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 3ab8294eab04..ecd8af900432 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -195,6 +195,22 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev) cpu_dev = &pdev->dev; cpu_dev->of_node = np; + cpu_reg = devm_regulator_get(cpu_dev, "cpu0"); + if (IS_ERR(cpu_reg)) { + /* + * If cpu0 regulator supply node is present, but regulator is + * not yet registered, we should try defering probe. + */ + if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) { + dev_err(cpu_dev, "cpu0 regulator not ready, retry\n"); + ret = -EPROBE_DEFER; + goto out_put_node; + } + pr_warn("failed to get cpu0 regulator: %ld\n", + PTR_ERR(cpu_reg)); + cpu_reg = NULL; + } + cpu_clk = devm_clk_get(cpu_dev, NULL); if (IS_ERR(cpu_clk)) { ret = PTR_ERR(cpu_clk); @@ -202,12 +218,6 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev) goto out_put_node; } - cpu_reg = devm_regulator_get(cpu_dev, "cpu0"); - if (IS_ERR(cpu_reg)) { - pr_warn("failed to get cpu0 regulator\n"); - cpu_reg = NULL; - } - ret = of_init_opp_table(cpu_dev); if (ret) { pr_err("failed to init OPP table: %d\n", ret); From 5aaa9cc7ab589893efe8e66bf02f7fc2175a1f5b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 May 2013 04:44:25 +0000 Subject: [PATCH 14/30] cpufreq: cpufreq-cpu0: Free parent node for error cases We are freeing parent node in success cases but not in failure cases. Let's do it. Signed-off-by: Viresh Kumar Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-cpu0.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index ecd8af900432..a64eb8b70444 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -189,7 +189,8 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev) if (!np) { pr_err("failed to find cpu0 node\n"); - return -ENOENT; + ret = -ENOENT; + goto out_put_parent; } cpu_dev = &pdev->dev; @@ -274,6 +275,8 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev) opp_free_cpufreq_table(cpu_dev, &freq_table); out_put_node: of_node_put(np); +out_put_parent: + of_node_put(parent); return ret; } From 99af771115f7b2f86548ca2f762adb9032096702 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 4 May 2013 12:03:54 +0530 Subject: [PATCH 15/30] cpufreq: ARM big LITTLE: Fix Kconfig entries This fixes usage of "depends on" and "select" options in Kconfig for ARM big LITTLE cpufreq driver. Otherwise we get these warnings: warning: (ARM_DT_BL_CPUFREQ) selects ARM_BIG_LITTLE_CPUFREQ which has unmet direct dependencies (ARCH_HAS_CPUFREQ && CPU_FREQ && ARM && ARM_CPU_TOPOLOGY) Signed-off-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index a78ce4490c61..6e57543fe0b9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -3,17 +3,17 @@ # config ARM_BIG_LITTLE_CPUFREQ - tristate - depends on ARM_CPU_TOPOLOGY - select PM_OPP + tristate "Generic ARM big LITTLE CPUfreq driver" + depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK + help + This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. config ARM_DT_BL_CPUFREQ - tristate "Generic ARM big LITTLE CPUfreq driver probed via DT" - select ARM_BIG_LITTLE_CPUFREQ - depends on OF && HAVE_CLK + tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" + depends on ARM_BIG_LITTLE_CPUFREQ && OF help - This enables the Generic CPUfreq driver for ARM big.LITTLE platform. - This gets frequency tables from DT. + This enables probing via DT for Generic CPUfreq driver for ARM + big.LITTLE platform. This gets frequency tables from DT. config ARM_EXYNOS_CPUFREQ bool "SAMSUNG EXYNOS SoCs" From 559f56c70fc90bd9da8c9c9c36d86c5e582ac5b3 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sun, 5 May 2013 12:18:08 +0000 Subject: [PATCH 16/30] cpufreq: Fix incorrect dependecies for ARM SA11xx drivers Kconfig dependecies for ARM SA11xx drivers are incorrect, so fix them. [rjw: Changelog] Signed-off-by: Alexander Shiyan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index a1488f58f6ca..534fcb825153 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -47,7 +47,7 @@ config CPU_FREQ_STAT_DETAILS choice prompt "Default CPUFreq governor" - default CPU_FREQ_DEFAULT_GOV_USERSPACE if CPU_FREQ_SA1100 || CPU_FREQ_SA1110 + default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ default CPU_FREQ_DEFAULT_GOV_PERFORMANCE help This option sets which CPUFreq governor shall be loaded at From 1abc4b20b85b42e8573957e54b193385cf48b0d6 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:25 -0700 Subject: [PATCH 17/30] cpufreq / intel_pstate: remove idle time and duration from sample and calculations Idle time is taken into account in the APERF/MPERF ratio calculation there is no reason for the driver to track it seperately. This reduces the work in the driver and makes the code more readable. Removal of the tracking of sample duration removes the possibility of the divide by zero exception when the duration is sub 1us References: https://bugzilla.kernel.org/show_bug.cgi?id=56691 Reported-by: Mike Lothian Cc: 3.9+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 43 ++++++---------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cc3a8e6c92be..c6e10d02b795 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,12 +48,7 @@ static inline int32_t div_fp(int32_t x, int32_t y) } struct sample { - ktime_t start_time; - ktime_t end_time; int core_pct_busy; - int pstate_pct_busy; - u64 duration_us; - u64 idletime_us; u64 aperf; u64 mperf; int freq; @@ -91,8 +86,6 @@ struct cpudata { int min_pstate_count; int idle_mode; - ktime_t prev_sample; - u64 prev_idle_time_us; u64 prev_aperf; u64 prev_mperf; int sample_ptr; @@ -450,48 +443,26 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { u64 core_pct; - sample->pstate_pct_busy = 100 - div64_u64( - sample->idletime_us * 100, - sample->duration_us); core_pct = div64_u64(sample->aperf * 100, sample->mperf); sample->freq = cpu->pstate.max_pstate * core_pct * 1000; - sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct), - 100); + sample->core_pct_busy = core_pct; } static inline void intel_pstate_sample(struct cpudata *cpu) { - ktime_t now; - u64 idle_time_us; u64 aperf, mperf; - now = ktime_get(); - idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL); - rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - /* for the first sample, don't actually record a sample, just - * set the baseline */ - if (cpu->prev_idle_time_us > 0) { - cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; - cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample; - cpu->samples[cpu->sample_ptr].end_time = now; - cpu->samples[cpu->sample_ptr].duration_us = - ktime_us_delta(now, cpu->prev_sample); - cpu->samples[cpu->sample_ptr].idletime_us = - idle_time_us - cpu->prev_idle_time_us; + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; + cpu->samples[cpu->sample_ptr].aperf = aperf; + cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; + cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; - cpu->samples[cpu->sample_ptr].aperf = aperf; - cpu->samples[cpu->sample_ptr].mperf = mperf; - cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; - cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); - intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); - } - - cpu->prev_sample = now; - cpu->prev_idle_time_us = idle_time_us; cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; } From d8f469e9cff3bc4a6317d923e9506be046aa7bdc Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:26 -0700 Subject: [PATCH 18/30] cpufreq / intel_pstate: use lowest requested max performance There are two ways that the maximum p-state can be clamped, via a policy change and via the sysfs file. The acpi-thermal driver adjusts the p-state policy in response to thermal events. These changes override the users settings at the moment. Use the lowest of the two requested values this ensures that we will not exceed the requested pstate from either mechanism. Reported-by: Srinivas Pandruvada Cc: 3.9+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c6e10d02b795..4a437ffc5186 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -117,6 +117,8 @@ struct perf_limits { int min_perf_pct; int32_t max_perf; int32_t min_perf; + int max_policy_pct; + int max_sysfs_pct; }; static struct perf_limits limits = { @@ -125,6 +127,8 @@ static struct perf_limits limits = { .max_perf = int_tofp(1), .min_perf_pct = 0, .min_perf = 0, + .max_policy_pct = 100, + .max_sysfs_pct = 100, }; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, @@ -295,7 +299,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; - limits.max_perf_pct = clamp_t(int, input, 0 , 100); + limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); + limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); return count; } @@ -646,8 +651,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); - limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq; - limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100); + limits.max_policy_pct = policy->max * 100 / policy->cpuinfo.max_freq; + limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); + limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); return 0; From ca182aee389f8026401510f4c63841cb02c820e8 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:27 -0700 Subject: [PATCH 19/30] cpufreq / intel_pstate: fix ffmpeg regression The ffmpeg benchmark in the phoronix test suite has threads on multiple cores that rely on the progress on of threads on other cores and ping pong back and forth fast enough to make the core appear less busy than it "should" be. If the core has been at minimum p-state for a while bump the pstate up to kick the core to see if it is in this ping pong state. If the core is truly idle the p-state will be reduced at the next sample time. If the core makes more progress it will send more work to the thread bringing both threads out of the ping pong scenario and the p-state will be selected normally. This fixes a performance regression of approximately 30% Cc: 3.9+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4a437ffc5186..a7f1946b3452 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -551,22 +551,16 @@ static void intel_pstate_timer_func(unsigned long __data) struct cpudata *cpu = (struct cpudata *) __data; intel_pstate_sample(cpu); + intel_pstate_adjust_busy_pstate(cpu); - if (!cpu->idle_mode) - intel_pstate_adjust_busy_pstate(cpu); - else - intel_pstate_adjust_idle_pstate(cpu); - -#if defined(XPERF_FIX) if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) { cpu->min_pstate_count++; if (!(cpu->min_pstate_count % 5)) { intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); - intel_pstate_idle_mode(cpu); } } else cpu->min_pstate_count = 0; -#endif + intel_pstate_set_sample_time(cpu); } From a73108d578559c83e35fa386a4058142a019b8d4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:28 -0700 Subject: [PATCH 20/30] cpufreq / intel_pstate: Remove idle mode PID Remove dead code from the driver. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 49 ---------------------------------- 1 file changed, 49 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a7f1946b3452..b93e3851b5d3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -81,10 +81,8 @@ struct cpudata { struct pstate_adjust_policy *pstate_policy; struct pstate_data pstate; struct _pid pid; - struct _pid idle_pid; int min_pstate_count; - int idle_mode; u64 prev_aperf; u64 prev_mperf; @@ -199,19 +197,6 @@ static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 0); } -static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu) -{ - pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct); - pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct); - pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct); - - pid_reset(&cpu->idle_pid, - 75, - 50, - cpu->pstate_policy->deadband, - 0); -} - static inline void intel_pstate_reset_all_pid(void) { unsigned int cpu; @@ -481,16 +466,6 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) mod_timer_pinned(&cpu->timer, jiffies + delay); } -static inline void intel_pstate_idle_mode(struct cpudata *cpu) -{ - cpu->idle_mode = 1; -} - -static inline void intel_pstate_normal_mode(struct cpudata *cpu) -{ - cpu->idle_mode = 0; -} - static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t busy_scaled; @@ -523,29 +498,6 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) intel_pstate_pstate_decrease(cpu, steps); } -static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu) -{ - int busy_scaled; - struct _pid *pid; - int ctl = 0; - int steps; - - pid = &cpu->idle_pid; - - busy_scaled = intel_pstate_get_scaled_busy(cpu); - - ctl = pid_calc(pid, 100 - busy_scaled); - - steps = abs(ctl); - if (ctl < 0) - intel_pstate_pstate_decrease(cpu, steps); - else - intel_pstate_pstate_increase(cpu, steps); - - if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) - intel_pstate_normal_mode(cpu); -} - static void intel_pstate_timer_func(unsigned long __data) { struct cpudata *cpu = (struct cpudata *) __data; @@ -601,7 +553,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) (unsigned long)cpu; cpu->timer.expires = jiffies + HZ/100; intel_pstate_busy_pid_reset(cpu); - intel_pstate_idle_pid_reset(cpu); intel_pstate_sample(cpu); intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); From 35363e943f2b0aa503e1dd55f894f736563e85a3 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:30 -0700 Subject: [PATCH 21/30] cpufreq / intel_pstate: remove #ifdef MODULE compile fence The driver can no longer be built as a module remove the compile fence around cpufreq tracing call. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b93e3851b5d3..0cc7d60525ac 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -391,9 +391,8 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) if (pstate == cpu->pstate.current_pstate) return; -#ifndef MODULE trace_cpu_frequency(pstate * 100000, cpu->cpu); -#endif + cpu->pstate.current_pstate = pstate; wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); From d96f7330176d69a9415162fb341b87e724e9ca74 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 10 May 2013 08:16:48 +0000 Subject: [PATCH 22/30] cpufreq / kirkwood: don't check resource with devm_ioremap_resource devm_ioremap_resource does sanity checks on the given resource. No need to duplicate this in the driver. Signed-off-by: Wolfram Sang Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/kirkwood-cpufreq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index d36ea8dc96eb..b2644af985ec 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -171,10 +171,6 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.dev = &pdev->dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Cannot get memory resource\n"); - return -ENODEV; - } priv.base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv.base)) return PTR_ERR(priv.base); From d5e1670afe0c886d6dd92afb7a1f085f88294dc8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 8 May 2013 01:14:32 +0200 Subject: [PATCH 23/30] PM: Avoid calling kfree() under spinlock in dev_pm_put_subsys_data() Fix dev_pm_put_subsys_data() so that it doesn't call kfree() under a spinlock and make it return 1 whenever it leaves NULL power.subsys_data (regardless of the reason). Signed-off-by: Shuah Khan Reviewed-by: Pavel Machek Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 39c32529b833..5da914041305 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -61,24 +61,24 @@ EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data); int dev_pm_put_subsys_data(struct device *dev) { struct pm_subsys_data *psd; - int ret = 0; + int ret = 1; spin_lock_irq(&dev->power.lock); psd = dev_to_psd(dev); - if (!psd) { - ret = -EINVAL; + if (!psd) goto out; - } if (--psd->refcount == 0) { dev->power.subsys_data = NULL; - kfree(psd); - ret = 1; + } else { + psd = NULL; + ret = 0; } out: spin_unlock_irq(&dev->power.lock); + kfree(psd); return ret; } From 60e6726c7bdec7fedae278ee3bf973ba988f2abf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 8 May 2013 21:34:48 +0200 Subject: [PATCH 24/30] cpufreq, ondemand: Remove leftover debug line I don't see how the virtual address of the tuners pointer would be of any help to anyone so remove it. Signed-off-by: Borislav Petkov Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index b0ffef96bf77..4b9bb5def6f1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -547,7 +547,6 @@ static int od_init(struct dbs_data *dbs_data) tuners->io_is_busy = should_io_be_busy(); dbs_data->tuners = tuners; - pr_info("%s: tuners %p\n", __func__, tuners); mutex_init(&dbs_data->mutex); return 0; } From b57ffac5e57bff33dde3cff35dff5c41876a6d12 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 May 2013 08:03:43 +0000 Subject: [PATCH 25/30] cpufreq / intel_pstate: use vzalloc() instead of vmalloc()/memset(0) Use vzalloc() instead of vmalloc() and memset(0). Signed-off-by: Wei Yongjun Acked-by: Viresh Kumar Acked-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0cc7d60525ac..9c36ace92a39 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -709,10 +709,9 @@ static int __init intel_pstate_init(void) pr_info("Intel P-state driver initializing.\n"); - all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus()); + all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) return -ENOMEM; - memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus()); rc = cpufreq_register_driver(&intel_pstate_driver); if (rc) From dc5aeae4f961ca8ea9511422236d7076585f149a Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 13 May 2013 02:42:11 +0000 Subject: [PATCH 26/30] PM: Documentation update for freeze state Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- Documentation/power/devices.txt | 15 ++++++++------- Documentation/power/interface.txt | 4 ++-- Documentation/power/states.txt | 20 +++++++++++++++++--- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 504dfe4d52eb..a66c9821b5ce 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -268,7 +268,7 @@ situations. System Power Management Phases ------------------------------ Suspending or resuming the system is done in several phases. Different phases -are used for standby or memory sleep states ("suspend-to-RAM") and the +are used for freeze, standby, and memory sleep states ("suspend-to-RAM") and the hibernation state ("suspend-to-disk"). Each phase involves executing callbacks for every device before the next phase begins. Not all busses or classes support all these callbacks and not all drivers use all the callbacks. The @@ -309,7 +309,8 @@ execute the corresponding method from dev->driver->pm instead if there is one. Entering System Suspend ----------------------- -When the system goes into the standby or memory sleep state, the phases are: +When the system goes into the freeze, standby or memory sleep state, +the phases are: prepare, suspend, suspend_late, suspend_noirq. @@ -368,7 +369,7 @@ the devices that were suspended. Leaving System Suspend ---------------------- -When resuming from standby or memory sleep, the phases are: +When resuming from freeze, standby or memory sleep, the phases are: resume_noirq, resume_early, resume, complete. @@ -433,8 +434,8 @@ the system log. Entering Hibernation -------------------- -Hibernating the system is more complicated than putting it into the standby or -memory sleep state, because it involves creating and saving a system image. +Hibernating the system is more complicated than putting it into the other +sleep states, because it involves creating and saving a system image. Therefore there are more phases for hibernation, with a different set of callbacks. These phases always run after tasks have been frozen and memory has been freed. @@ -485,8 +486,8 @@ image forms an atomic snapshot of the system state. At this point the system image is saved, and the devices then need to be prepared for the upcoming system shutdown. This is much like suspending them -before putting the system into the standby or memory sleep state, and the phases -are similar. +before putting the system into the freeze, standby or memory sleep state, +and the phases are similar. 9. The prepare phase is discussed above. diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index c537834af005..f1f0f59a7c47 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -7,8 +7,8 @@ running. The interface exists in /sys/power/ directory (assuming sysfs is mounted at /sys). /sys/power/state controls system power state. Reading from this file -returns what states are supported, which is hard-coded to 'standby' -(Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk' +returns what states are supported, which is hard-coded to 'freeze', +'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk' (Suspend-to-Disk). Writing to this file one of those strings causes the system to diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt index 4416b28630df..42f28b7aaf6b 100644 --- a/Documentation/power/states.txt +++ b/Documentation/power/states.txt @@ -2,12 +2,26 @@ System Power Management States -The kernel supports three power management states generically, though -each is dependent on platform support code to implement the low-level -details for each state. This file describes each state, what they are +The kernel supports four power management states generically, though +one is generic and the other three are dependent on platform support +code to implement the low-level details for each state. +This file describes each state, what they are commonly called, what ACPI state they map to, and what string to write to /sys/power/state to enter that state +state: Freeze / Low-Power Idle +ACPI state: S0 +String: "freeze" + +This state is a generic, pure software, light-weight, low-power state. +It allows more energy to be saved relative to idle by freezing user +space and putting all I/O devices into low-power states (possibly +lower-power than available at run time), such that the processors can +spend more time in their idle states. +This state can be used for platforms without Standby/Suspend-to-RAM +support, or it can be used in addition to Suspend-to-RAM (memory sleep) +to provide reduced resume latency. + State: Standby / Power-On Suspend ACPI State: S1 From 86fd03d1607525af3926ff0a299b16c51fa4221b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 13 May 2013 02:42:12 +0000 Subject: [PATCH 27/30] PM / Documentation: remove inaccurate suspend/hibernate transition lantency statement The lantency of the transition from suspend and hibernate is platform-dependent. Thus we should not refer the lantency in the documentation. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- Documentation/power/states.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt index 42f28b7aaf6b..442d43df9b25 100644 --- a/Documentation/power/states.txt +++ b/Documentation/power/states.txt @@ -36,9 +36,6 @@ We try to put devices in a low-power state equivalent to D1, which also offers low power savings, but low resume latency. Not all devices support D1, and those that don't are left on. -A transition from Standby to the On state should take about 1-2 -seconds. - State: Suspend-to-RAM ACPI State: S3 @@ -56,9 +53,6 @@ transition back to the On state. For at least ACPI, STR requires some minimal boot-strapping code to resume the system from STR. This may be true on other platforms. -A transition from Suspend-to-RAM to the On state should take about -3-5 seconds. - State: Suspend-to-disk ACPI State: S4 @@ -88,7 +82,3 @@ low-power state (like ACPI S4), or it may simply power down. Powering down offers greater savings, and allows this mechanism to work on any system. However, entering a real low-power state allows the user to trigger wake up events (e.g. pressing a key or opening a laptop lid). - -A transition from Suspend-to-Disk to the On state should take about 30 -seconds, though it's typically a bit more with the current -implementation. From 6bc08ed02385378d8c84410fb407fe5640834350 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 May 2013 12:00:03 +0000 Subject: [PATCH 28/30] PM / hibernate: Correct documentation Correct the meaning of PM_HIBERNATION_PREPARE in the docs. References: http://lkml.kernel.org/r/20130512162717.GA6305@pd.tnic Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- Documentation/power/notifiers.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt index c2a4a346c0d9..a81fa254303d 100644 --- a/Documentation/power/notifiers.txt +++ b/Documentation/power/notifiers.txt @@ -15,8 +15,10 @@ A suspend/hibernation notifier may be used for this purpose. The subsystems or drivers having such needs can register suspend notifiers that will be called upon the following events by the PM core: -PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will - be frozen immediately. +PM_HIBERNATION_PREPARE The system is going to hibernate, tasks will be frozen + immediately. This is different from PM_SUSPEND_PREPARE + below because here we do additional work between notifiers + and drivers freezing. PM_POST_HIBERNATION The system memory state has been restored from a hibernation image or an error occurred during From 7a26b53070f0099dfcfc9d499458de861c5c4859 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 15 May 2013 16:49:35 +0000 Subject: [PATCH 29/30] ACPI / scan: Fix memory leak on acpi_scan_init_hotplug() error path Following commit 6b772e8f9 (ACPI: Update PNPID match handling for notify), the acpi_scan_init_hotplug() calls acpi_set_pnp_ids() which allocates acpi_hardware_id and copies a few strings (kstrdup). If the devices does not have hardware_id set, the function exits without freeing the previously allocated ids (and kmemleak complains). This patch calls simply changes 'return' on error to a 'goto out' which calls acpi_free_pnp_ids(). Reported-by: Larry Finger Signed-off-by: Catalin Marinas Reviewed-by: Toshi Kani Tested-by: Toshi Kani Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index fe158fd4f1df..c1bc608339a6 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1785,7 +1785,7 @@ static void acpi_scan_init_hotplug(acpi_handle handle, int type) acpi_set_pnp_ids(handle, &pnp, type); if (!pnp.type.hardware_id) - return; + goto out; /* * This relies on the fact that acpi_install_notify_handler() will not @@ -1800,6 +1800,7 @@ static void acpi_scan_init_hotplug(acpi_handle handle, int type) } } +out: acpi_free_pnp_ids(&pnp); } From a66b2e503fc79fff6632d02ef5a0ee47c1d2553d Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 15 May 2013 21:47:17 +0200 Subject: [PATCH 30/30] cpufreq: Preserve sysfs files across suspend/resume The file permissions of cpufreq per-cpu sysfs files are not preserved across suspend/resume because we internally go through the CPU Hotplug path which reinitializes the file permissions on CPU online. But the user is not supposed to know that we are using CPU hotplug internally within suspend/resume (IOW, the kernel should not silently wreck the user-set file permissions across a suspend cycle). Therefore, we need to preserve the file permissions as they are across suspend/resume. The simplest way to achieve that is to just not touch the sysfs files at all - ie., just ignore the CPU hotplug notifications in the suspend/resume path (_FROZEN) in the cpufreq hotplug callback. Reported-by: Robert Jarzmik Reported-by: Durgadoss R Signed-off-by: Srivatsa S. Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 +--- drivers/cpufreq/cpufreq_stats.c | 7 ++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b7acfd153bf9..4b8c7f297d74 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1832,15 +1832,13 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, if (dev) { switch (action) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: cpufreq_add_dev(dev, NULL); break; case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: + case CPU_UP_CANCELED_FROZEN: __cpufreq_remove_dev(dev, NULL); break; case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: cpufreq_add_dev(dev, NULL); break; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index bfd6273fd873..fb65decffa28 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -349,15 +349,16 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: cpufreq_update_policy(cpu); break; case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: cpufreq_stats_free_sysfs(cpu); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: + cpufreq_stats_free_table(cpu); + break; + case CPU_UP_CANCELED_FROZEN: + cpufreq_stats_free_sysfs(cpu); cpufreq_stats_free_table(cpu); break; }