From da470db16c703d7f9617c366a36c6670f89a9830 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Mon, 29 Jun 2009 19:53:41 +0000 Subject: [PATCH 01/13] [CPUFREQ] update Doc for cpuinfo_cur_freq and scaling_cur_freq I think the way "cpuinfo_cur_info" and "scaling_cur_info" are defined under ./Documentation/cpu-freq/user-guide.txt can be enhanced. Currently, they are both defined the same way: "Current speed/frequency" of the CPU, in KHz". Below is a patch that distinguishes one from the other. Regards, - naga - ----------------------------------------- Update description for "cpuinfo_cur_freq" and "scaling_cur_freq". Some of the wording is drawn from comments found in ./drivers/cpufreq/cpufreq.c: cpufreq_out_of_sync(): * @old_freq: CPU frequency the kernel thinks the CPU runs at * @new_freq: CPU frequency the CPU actually runs at Signed-off-by: Naga Chumbalkar Signed-off-by: Dave Jones --- Documentation/cpu-freq/user-guide.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 5d5f5fadd1c2..2a5b850847c0 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -176,7 +176,9 @@ scaling_governor, and by "echoing" the name of another work on some specific architectures or processors. -cpuinfo_cur_freq : Current speed of the CPU, in KHz. +cpuinfo_cur_freq : Current frequency of the CPU as obtained from + the hardware, in KHz. This is the frequency + the CPU actually runs at. scaling_available_frequencies : List of available frequencies, in KHz. @@ -196,7 +198,10 @@ related_cpus : List of CPUs that need some sort of frequency scaling_driver : Hardware driver for cpufreq. -scaling_cur_freq : Current frequency of the CPU, in KHz. +scaling_cur_freq : Current frequency of the CPU as determined by + the governor and cpufreq core, in KHz. This is + the frequency the kernel thinks the CPU runs + at. If you have selected the "userspace" governor which allows you to set the CPU operating frequency to a specific value, you can read out From 54e6fe167b8787460ee39e7c333b96e3e2e6a196 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 16:28:05 -0400 Subject: [PATCH 02/13] [CPUFREQ] Reduce scope of cpu_sys_dev in cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2968ed6a9c49..ce31e6e3d982 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -773,7 +773,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) struct cpufreq_policy new_policy; struct cpufreq_policy *policy; struct freq_attr **drv_attr; - struct sys_device *cpu_sys_dev; unsigned long flags; unsigned int j; @@ -936,6 +935,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) /* symlink affected CPUs */ for_each_cpu(j, policy->cpus) { struct cpufreq_policy *managed_policy; + struct sys_device *cpu_sys_dev; if (j == cpu) continue; From 059019a3c3353b15d8efac5301f72036cc408bd4 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 16:30:03 -0400 Subject: [PATCH 03/13] [CPUFREQ] cleanup up -ENOMEM handling in cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ce31e6e3d982..06eeff3c822f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -798,19 +798,16 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) goto module_out; } + ret = -ENOMEM; policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); - if (!policy) { - ret = -ENOMEM; + if (!policy) goto nomem_out; - } - if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { - ret = -ENOMEM; + + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) goto err_free_policy; - } - if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { - ret = -ENOMEM; + + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; - } policy->cpu = cpu; cpumask_copy(policy->cpus, cpumask_of(cpu)); From 19d6f7ec3eb1652fc89dd05ebcc2a21a95c60a5a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 17:35:39 -0400 Subject: [PATCH 04/13] [CPUFREQ] Factor out symlink creation from cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 51 ++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 06eeff3c822f..7600c10d275a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -756,6 +756,34 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; +/* symlink affected CPUs */ +int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) +{ + unsigned int j; + int ret = 0; + + for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + struct sys_device *cpu_sys_dev; + + if (j == cpu) + continue; + if (!cpu_online(j)) + continue; + + dprintk("CPU %u already managed, adding link\n", j); + managed_policy = cpufreq_cpu_get(cpu); + cpu_sys_dev = get_cpu_sysdev(j); + ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, + "cpufreq"); + if (ret) { + cpufreq_cpu_put(managed_policy); + return ret; + } + } + return ret; +} + /** * cpufreq_add_dev - add a CPU device @@ -929,26 +957,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - /* symlink affected CPUs */ - for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; - struct sys_device *cpu_sys_dev; - - if (j == cpu) - continue; - if (!cpu_online(j)) - continue; - - dprintk("CPU %u already managed, adding link\n", j); - managed_policy = cpufreq_cpu_get(cpu); - cpu_sys_dev = get_cpu_sysdev(j); - ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, - "cpufreq"); - if (ret) { - cpufreq_cpu_put(managed_policy); - goto err_out_unregister; - } - } + ret = cpufreq_add_dev_symlink(cpu, policy->cpus, policy); + if (ret) + goto err_out_unregister; policy->governor = NULL; /* to assure that the starting sequence is * run in cpufreq_set_policy */ From 909a694e336bf3a6e48b5b51129887a1c5cae04c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 18:05:42 -0400 Subject: [PATCH 05/13] [CPUFREQ] Factor out interface creation from cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 89 +++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7600c10d275a..0ee008da46f2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -784,6 +784,57 @@ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) return ret; } +int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, + struct sys_device *sys_dev) +{ + struct freq_attr **drv_attr; + unsigned long flags; + int ret = 0; + unsigned int j; + + /* prepare interface data */ + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + &sys_dev->kobj, "cpufreq"); + if (ret) + return ret; + + /* set up files for this cpu device */ + drv_attr = cpufreq_driver->attr; + while ((drv_attr) && (*drv_attr)) { + ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); + if (ret) + goto err_out_kobj_put; + drv_attr++; + } + if (cpufreq_driver->get) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); + if (ret) + goto err_out_kobj_put; + } + if (cpufreq_driver->target) { + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); + if (ret) + goto err_out_kobj_put; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu(j, policy->cpus) { + if (!cpu_online(j)) + continue; + per_cpu(cpufreq_cpu_data, j) = policy; + per_cpu(policy_cpu, j) = policy->cpu; + } + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + ret = cpufreq_add_dev_symlink(cpu, policy); + return ret; + +err_out_kobj_put: + kobject_put(&policy->kobj); + wait_for_completion(&policy->kobj_unregister); + return ret; +} + /** * cpufreq_add_dev - add a CPU device @@ -800,7 +851,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) int ret = 0; struct cpufreq_policy new_policy; struct cpufreq_policy *policy; - struct freq_attr **drv_attr; unsigned long flags; unsigned int j; @@ -923,41 +973,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) #endif memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); - /* prepare interface data */ - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, - "cpufreq"); - if (ret) - goto out_driver_exit; - - /* set up files for this cpu device */ - drv_attr = cpufreq_driver->attr; - while ((drv_attr) && (*drv_attr)) { - ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); - if (ret) - goto err_out_kobj_put; - drv_attr++; - } - if (cpufreq_driver->get) { - ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); - if (ret) - goto err_out_kobj_put; - } - if (cpufreq_driver->target) { - ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); - if (ret) - goto err_out_kobj_put; - } - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; - per_cpu(cpufreq_cpu_data, j) = policy; - per_cpu(policy_cpu, j) = policy->cpu; - } - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - ret = cpufreq_add_dev_symlink(cpu, policy->cpus, policy); + ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); if (ret) goto err_out_unregister; @@ -990,7 +1006,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) per_cpu(cpufreq_cpu_data, j) = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); -err_out_kobj_put: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); From ecf7e4611c89aba7c7fde1183f7e9357695fbcc5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 18:48:47 -0400 Subject: [PATCH 06/13] [CPUFREQ] Factor out policy setting from cpufreq_add_dev Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 166 +++++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0ee008da46f2..845687884c1e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -756,6 +756,75 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; + +int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, + struct sys_device *sys_dev) +{ + int ret = 0; +#ifdef CONFIG_SMP + unsigned long flags; + unsigned int j; + +#ifdef CONFIG_HOTPLUG_CPU + if (per_cpu(cpufreq_cpu_governor, cpu)) { + policy->governor = per_cpu(cpufreq_cpu_governor, cpu); + dprintk("Restoring governor %s for cpu %d\n", + policy->governor->name, cpu); + } +#endif + + for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + + if (cpu == j) + continue; + + /* Check for existing affected CPUs. + * They may not be aware of it due to CPU Hotplug. + * cpufreq_cpu_put is called when the device is removed + * in __cpufreq_remove_dev() + */ + managed_policy = cpufreq_cpu_get(j); + if (unlikely(managed_policy)) { + + /* Set proper policy_cpu */ + unlock_policy_rwsem_write(cpu); + per_cpu(policy_cpu, cpu) = managed_policy->cpu; + + if (lock_policy_rwsem_write(cpu) < 0) { + /* Should not go through policy unlock path */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + cpufreq_cpu_put(managed_policy); + return -EBUSY; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpumask_copy(managed_policy->cpus, policy->cpus); + per_cpu(cpufreq_cpu_data, cpu) = managed_policy; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + dprintk("CPU already managed, adding link\n"); + ret = sysfs_create_link(&sys_dev->kobj, + &managed_policy->kobj, + "cpufreq"); + if (ret) + cpufreq_cpu_put(managed_policy); + /* + * Success. We only needed to be added to the mask. + * Call driver->exit() because only the cpu parent of + * the kobj needed to call init(). + */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + return ret; + } + } +#endif + return ret; +} + + /* symlink affected CPUs */ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) { @@ -787,6 +856,7 @@ int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy) int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, struct sys_device *sys_dev) { + struct cpufreq_policy new_policy; struct freq_attr **drv_attr; unsigned long flags; int ret = 0; @@ -827,6 +897,23 @@ int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, spin_unlock_irqrestore(&cpufreq_driver_lock, flags); ret = cpufreq_add_dev_symlink(cpu, policy); + if (ret) + goto err_out_kobj_put; + + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + /* assure that the starting sequence is run in __cpufreq_set_policy */ + policy->governor = NULL; + + /* set default policy */ + ret = __cpufreq_set_policy(policy, &new_policy); + policy->user_policy.policy = policy->policy; + policy->user_policy.governor = policy->governor; + + if (ret) { + dprintk("setting policy failed\n"); + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + } return ret; err_out_kobj_put: @@ -849,7 +936,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) { unsigned int cpu = sys_dev->id; int ret = 0; - struct cpufreq_policy new_policy; struct cpufreq_policy *policy; unsigned long flags; unsigned int j; @@ -914,82 +1000,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); -#ifdef CONFIG_SMP - -#ifdef CONFIG_HOTPLUG_CPU - if (per_cpu(cpufreq_cpu_governor, cpu)) { - policy->governor = per_cpu(cpufreq_cpu_governor, cpu); - dprintk("Restoring governor %s for cpu %d\n", - policy->governor->name, cpu); - } -#endif - - for_each_cpu(j, policy->cpus) { - struct cpufreq_policy *managed_policy; - - if (cpu == j) - continue; - - /* Check for existing affected CPUs. - * They may not be aware of it due to CPU Hotplug. - * cpufreq_cpu_put is called when the device is removed - * in __cpufreq_remove_dev() - */ - managed_policy = cpufreq_cpu_get(j); - if (unlikely(managed_policy)) { - - /* Set proper policy_cpu */ - unlock_policy_rwsem_write(cpu); - per_cpu(policy_cpu, cpu) = managed_policy->cpu; - - if (lock_policy_rwsem_write(cpu) < 0) { - /* Should not go through policy unlock path */ - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - ret = -EBUSY; - cpufreq_cpu_put(managed_policy); - goto err_free_cpumask; - } - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpumask_copy(managed_policy->cpus, policy->cpus); - per_cpu(cpufreq_cpu_data, cpu) = managed_policy; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - dprintk("CPU already managed, adding link\n"); - ret = sysfs_create_link(&sys_dev->kobj, - &managed_policy->kobj, - "cpufreq"); - if (ret) - cpufreq_cpu_put(managed_policy); - /* - * Success. We only needed to be added to the mask. - * Call driver->exit() because only the cpu parent of - * the kobj needed to call init(). - */ - goto out_driver_exit; /* call driver->exit() */ - } - } -#endif - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + ret = cpufreq_add_dev_policy(cpu, policy, sys_dev); + if (ret) + goto err_unlock_policy; ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); if (ret) goto err_out_unregister; - policy->governor = NULL; /* to assure that the starting sequence is - * run in cpufreq_set_policy */ - - /* set default policy */ - ret = __cpufreq_set_policy(policy, &new_policy); - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; - - if (ret) { - dprintk("setting policy failed\n"); - goto err_out_unregister; - } - unlock_policy_rwsem_write(cpu); kobject_uevent(&policy->kobj, KOBJ_ADD); @@ -1009,10 +1027,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -out_driver_exit: - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - err_unlock_policy: unlock_policy_rwsem_write(cpu); err_free_cpumask: From 4bfa042cd304aa48cf05cd0a13c2d0794a675c0e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:03 +0200 Subject: [PATCH 07/13] [CPUFREQ] Bail out of cpufreq_add_dev if the link for a managed CPU got created Doing: echo 0 >cpu1/online echo 1 >cpu1/online on a managed CPU will result in: Jul 22 15:15:37 linux kernel: [ 80.013864] WARNING: at fs/sysfs/dir.c:487 sysfs_add_one+0xcf/0xe6() Jul 22 15:15:37 linux kernel: [ 80.013866] Hardware name: To Be Filled By O.E.M. Jul 22 15:15:37 linux kernel: [ 80.013868] sysfs: cannot create duplicate filename '/devices/system/cpu/cpu1/cpufreq' Jul 22 15:15:37 linux kernel: [ 80.013870] Modules linked in: powernow_k8 Jul 22 15:15:37 linux kernel: [ 80.013874] Pid: 5750, comm: bash Not tainted 2.6.31-rc2 #40 Jul 22 15:15:37 linux kernel: [ 80.013876] Call Trace: Jul 22 15:15:37 linux kernel: [ 80.013879] [] ? sysfs_add_one+0xcf/0xe6 Jul 22 15:15:37 linux kernel: [ 80.013884] [] warn_slowpath_common+0x77/0xa4 Jul 22 15:15:37 linux kernel: [ 80.013888] [] warn_slowpath_fmt+0x3c/0x3e Jul 22 15:15:37 linux kernel: [ 80.013891] [] sysfs_add_one+0xcf/0xe6 Jul 22 15:15:37 linux kernel: [ 80.013894] [] create_dir+0x58/0x87 Jul 22 15:15:37 linux kernel: [ 80.013898] [] sysfs_create_dir+0x38/0x4f Jul 22 15:15:37 linux kernel: [ 80.013902] [] kobject_add_internal+0x11f/0x1de Jul 22 15:15:37 linux kernel: [ 80.013905] [] kobject_add_varg+0x41/0x4e Jul 22 15:15:37 linux kernel: [ 80.013908] [] kobject_init_and_add+0x4c/0x57 Jul 22 15:15:37 linux kernel: [ 80.013913] [] ? mark_lock+0x22/0x228 Jul 22 15:15:37 linux kernel: [ 80.013918] [] cpufreq_add_dev_interface+0x40/0x1e4 ... This bug slipped in by git commit: 150b06f7f223cfd0f808737a5243cceca8ea47fa When splitting up cpufreq_add_dev, the whole cpufreq_add_dev function is not left anymore, only cpufreq_add_dev_policy. This patch should reconstruct the identical functionality again as it was before the split. CC: Venkatesh Pallipadi Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 845687884c1e..bbd5c2164ab6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -756,7 +756,12 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; - +/* + * Returns: + * Negative: Failure + * 0: Success + * Positive: When we have a managed CPU and the sysfs got symlinked + */ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, struct sys_device *sys_dev) { @@ -817,7 +822,11 @@ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, */ if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - return ret; + + if (!ret) + return 1; + else + return ret; } } #endif @@ -1001,8 +1010,13 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) CPUFREQ_START, policy); ret = cpufreq_add_dev_policy(cpu, policy, sys_dev); - if (ret) + if (ret) { + if (ret > 0) + /* This is a managed cpu, symlink created, + exit with 0 */ + ret = 0; goto err_unlock_policy; + } ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); if (ret) From 8aa84ad8d6c740a04386f599694609ee4998e82e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:05 +0200 Subject: [PATCH 08/13] [CPUFREQ] Introduce global, not per core: /sys/devices/system/cpu/cpufreq Currently everything in the cpufreq layer is per core based. This does not reflect reality, for example ondemand on conservative governors have global sysfs variables. Introduce a global cpufreq directory and add the kobject to the governor struct, so that governors can easily access it. The directory is initialized in the cpufreq_core_init initcall and thus will always be created if cpufreq is compiled in, even if no cpufreq driver is active later. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 9 ++++++++- include/linux/cpufreq.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bbd5c2164ab6..4da28444b235 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -686,6 +686,9 @@ static struct attribute *default_attrs[] = { NULL }; +struct kobject *cpufreq_global_kobject; +EXPORT_SYMBOL(cpufreq_global_kobject); + #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -1935,7 +1938,11 @@ static int __init cpufreq_core_init(void) per_cpu(policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } + + cpufreq_global_kobject = kobject_create_and_add("cpufreq", + &cpu_sysdev_class.kset.kobj); + BUG_ON(!cpufreq_global_kobject); + return 0; } - core_initcall(cpufreq_core_init); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 161042746afc..44717eb47639 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,6 +65,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb, struct cpufreq_governor; +/* /sys/devices/system/cpu/cpufreq: entry point for global variables */ +extern struct kobject *cpufreq_global_kobject; + #define CPUFREQ_ETERNAL (-1) struct cpufreq_cpuinfo { unsigned int max_freq; @@ -274,6 +277,13 @@ struct freq_attr { ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count); }; +struct global_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *a, struct attribute *b, + const char *c, size_t count); +}; /********************************************************************* * CPUFREQ 2.6. INTERFACE * From 0e625ac153126a0a62b7635fa9dc91f87ff39e38 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:06 +0200 Subject: [PATCH 09/13] [CPUFREQ] ondemand - Use global sysfs dir for tuning settings Ondemand has only global variables for userspace tunings via sysfs. But they were exposed per CPU which wrongly implies to the user that his settings are applied per cpu. Also locking sysfs against concurrent access won't be necessary anymore after deprecation time. This means the ondemand config dir is moved: /sys/devices/system/cpu/cpu*/cpufreq/ondemand -> /sys/devices/system/cpu/cpufreq/ondemand The old files will still exist, but reading or writing to them will result in one (printk_once) deprecation msg to syslog per file. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 139 +++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d6ba14276bb1..1a3e5c7252ff 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -55,6 +55,18 @@ static unsigned int min_sampling_rate; #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; /* Sampling types */ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; @@ -206,20 +218,23 @@ static void ondemand_powersave_bias_init(void) } /************************** sysfs interface ************************/ -static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_sampling_rate_max(struct kobject *kobj, + struct attribute *attr, char *buf) { printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " "sysfs file is deprecated - used by: %s\n", current->comm); return sprintf(buf, "%u\n", -1U); } -static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) { return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -228,7 +243,7 @@ define_one_ro(sampling_rate_min); /* cpufreq_ondemand Governor Tunables */ #define show_one(file_name, object) \ static ssize_t show_##file_name \ -(struct cpufreq_policy *unused, char *buf) \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ } @@ -237,8 +252,38 @@ show_one(up_threshold, up_threshold); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); -static ssize_t store_sampling_rate(struct cpufreq_policy *unused, - const char *buf, size_t count) +/*** delete after deprecation time ***/ + +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); + +#define show_one_old(file_name) \ +static ssize_t show_##file_name##_old \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return show_##file_name(NULL, NULL, buf); \ +} +show_one_old(sampling_rate); +show_one_old(up_threshold); +show_one_old(ignore_nice_load); +show_one_old(powersave_bias); +show_one_old(sampling_rate_min); +show_one_old(sampling_rate_max); + +#define define_one_ro_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +define_one_ro_old(sampling_rate_min_old, sampling_rate_min); +define_one_ro_old(sampling_rate_max_old, sampling_rate_max); + +/*** delete after deprecation time ***/ + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -253,8 +298,8 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return count; } -static ssize_t store_up_threshold(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -272,8 +317,8 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -309,8 +354,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, return count; } -static ssize_t store_powersave_bias(struct cpufreq_policy *unused, - const char *buf, size_t count) +static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, + const char *buf, size_t count) { unsigned int input; int ret; @@ -331,7 +376,7 @@ static ssize_t store_powersave_bias(struct cpufreq_policy *unused, } #define define_one_rw(_name) \ -static struct freq_attr _name = \ +static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); @@ -354,6 +399,47 @@ static struct attribute_group dbs_attr_group = { .name = "ondemand", }; +/*** delete after deprecation time ***/ + +#define write_one_old(file_name) \ +static ssize_t store_##file_name##_old \ +(struct cpufreq_policy *unused, const char *buf, size_t count) \ +{ \ + printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ + "interface is deprecated - " #file_name "\n"); \ + return store_##file_name(NULL, NULL, buf, count); \ +} +write_one_old(sampling_rate); +write_one_old(up_threshold); +write_one_old(ignore_nice_load); +write_one_old(powersave_bias); + +#define define_one_rw_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + +define_one_rw_old(sampling_rate_old, sampling_rate); +define_one_rw_old(up_threshold_old, up_threshold); +define_one_rw_old(ignore_nice_load_old, ignore_nice_load); +define_one_rw_old(powersave_bias_old, powersave_bias); + +static struct attribute *dbs_attributes_old[] = { + &sampling_rate_max_old.attr, + &sampling_rate_min_old.attr, + &sampling_rate_old.attr, + &up_threshold_old.attr, + &ignore_nice_load_old.attr, + &powersave_bias_old.attr, + NULL +}; + +static struct attribute_group dbs_attr_group_old = { + .attrs = dbs_attributes_old, + .name = "ondemand", +}; + +/*** delete after deprecation time ***/ + /************************** sysfs end ************************/ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) @@ -544,7 +630,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_mutex); - rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); if (rc) { mutex_unlock(&dbs_mutex); return rc; @@ -565,13 +651,20 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } this_dbs_info->cpu = cpu; ondemand_powersave_bias_init_cpu(cpu); - mutex_init(&this_dbs_info->timer_mutex); /* * Start the timerschedule work, when this governor * is used for first time */ if (dbs_enable == 1) { unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + /* policy latency is in nS. Convert it to uS first */ latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) @@ -585,6 +678,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } mutex_unlock(&dbs_mutex); + mutex_init(&this_dbs_info->timer_mutex); dbs_timer_init(this_dbs_info); break; @@ -592,10 +686,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_timer_exit(this_dbs_info); mutex_lock(&dbs_mutex); - sysfs_remove_group(&policy->kobj, &dbs_attr_group); + sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; mutex_unlock(&dbs_mutex); + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); break; @@ -613,16 +710,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return 0; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { int err; From 395913d0b1db37092ea3d9d69b832183b1dd84c5 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 8 Jun 2009 13:17:31 -0400 Subject: [PATCH 10/13] [CPUFREQ] remove rwsem lock from CPUFREQ_GOV_STOP call (second call site) remove rwsem lock from CPUFREQ_GOV_STOP call (second call site) commit 42a06f2166f2f6f7bf04f32b4e823eacdceafdc9 Missed a call site for CPUFREQ_GOV_STOP to remove the rwlock taken around the teardown. To make a long story short, the rwlock write-lock causes a circular dependency with cancel_delayed_work_sync(), because the timer handler takes the read lock. Note that all callers to __cpufreq_set_policy are taking the rwsem. All sysfs callers (writers) hold the write rwsem at the earliest sysfs calling stage. However, the rwlock write-lock is not needed upon governor stop. Signed-off-by: Mathieu Desnoyers Acked-by: Venkatesh Pallipadi CC: rjw@sisk.pl CC: mingo@elte.hu CC: Shaohua Li CC: Pekka Enberg CC: Dave Young CC: "Rafael J. Wysocki" CC: Rusty Russell CC: trenn@suse.de CC: sven.wegener@stealer.net CC: cpufreq@vger.kernel.org Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4da28444b235..3938c7817095 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -61,6 +61,8 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock); * are concerned with are online after they get the lock. * - Governor routines that can be called in cpufreq hotplug path should not * take this sem as top level hotplug notifier handler takes this. + * - Lock should not be held across + * __cpufreq_governor(data, CPUFREQ_GOV_STOP); */ static DEFINE_PER_CPU(int, policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); @@ -1707,8 +1709,17 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, dprintk("governor switch\n"); /* end old governor */ - if (data->governor) + if (data->governor) { + /* + * Need to release the rwsem around governor + * stop due to lock dependency between + * cancel_delayed_work_sync and the read lock + * taken in the delayed work handler. + */ + unlock_policy_rwsem_write(data->cpu); __cpufreq_governor(data, CPUFREQ_GOV_STOP); + lock_policy_rwsem_write(data->cpu); + } /* start new governor */ data->governor = policy->governor; From db39d5529d347de5e2eec1a72d67fcfacae6c5a2 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Fri, 21 Aug 2009 19:15:28 -0500 Subject: [PATCH 11/13] [CPUFREQ] Powernow-k8: Enable more than 2 low P-states Remove an obsolete check that used to prevent there being more than 2 low P-states. Now that low-to-low P-states changes are enabled, it prevents otherwise workable configurations with multiple low P-states. Signed-off-by: Mark Langsdorf Tested-by: Krists Krilovs Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 29 ++++------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 2a50ef891000..0cbce0481a54 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -854,6 +854,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } + /* fill in data */ + data->numps = data->acpi_data.state_count; + powernow_k8_acpi_pst_values(data, 0); + if (cpu_family == CPU_HW_PSTATE) ret_val = fill_powernow_table_pstate(data, powernow_table); else @@ -866,11 +870,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) powernow_table[data->acpi_data.state_count].index = 0; data->powernow_table = powernow_table; - /* fill in data */ - data->numps = data->acpi_data.state_count; if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) print_basics(data); - powernow_k8_acpi_pst_values(data, 0); /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); @@ -941,7 +942,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; - int cntlofreq = 0; for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; @@ -982,27 +982,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, continue; } - /* verify only 1 entry from the lo frequency table */ - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, - * ignore this one ... */ - if ((freq != powernow_table[cntlofreq].frequency) || - (index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX - "Too many lo freq table " - "entries\n"); - return 1; - } - - dprintk("double low frequency table entry, " - "ignoring it.\n"); - invalidate_entry(data, i); - continue; - } else - cntlofreq = i; - } - if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries " "%u kHz vs. %u kHz\n", freq, From 1a8e42fa81e62d47cc471f7764f906bb42b27a54 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 26 Aug 2009 13:19:37 -0400 Subject: [PATCH 12/13] [CPUFREQ] Create a blacklist for processors that should not load the acpi-cpufreq module. Create a blacklist for processors that should not load the acpi-cpufreq module. The initial entry in the blacklist function is the Intel 0f68 processor. It's specification update mentions errata AL30 which implies that cpufreq should not run on this processor. Signed-off-by: Prarit Bhargava Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503220ca..badce5084060 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -588,6 +588,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; + +static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) +{ + /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + * AL30: A Machine Check Exception (MCE) Occurring during an + * Enhanced Intel SpeedStep Technology Ratio Change May Cause + * Both Processor Cores to Lock Up when HT is enabled*/ + if (c->x86_vendor == X86_VENDOR_INTEL) { + if ((c->x86 == 15) && + (c->x86_model == 6) && + (c->x86_mask == 8) && smt_capable()) + return -ENODEV; + } + return 0; +} #endif static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) @@ -602,6 +617,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dprintk("acpi_cpufreq_cpu_init\n"); +#ifdef CONFIG_SMP + result = acpi_cpufreq_blacklist(c); + if (result) + return result; +#endif + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); if (!data) return -ENOMEM; From f0adb134d8dc9993a9998dc50845ec4f6ff4fadc Mon Sep 17 00:00:00 2001 From: Kurt Roeckx Date: Wed, 16 Sep 2009 11:09:32 -0400 Subject: [PATCH 13/13] [CPUFREQ] Fix NULL ptr regression in powernow-k8 Fixes bugzilla #13780 From: Kurt Roeckx Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 0cbce0481a54..6394aa5c7985 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -605,9 +605,10 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, return 0; } -static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) +static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, + unsigned int entry) { - data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; + powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; } static void print_basics(struct powernow_k8_data *data) @@ -915,13 +916,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, "bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS " "manufacturer\n"); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); if (!(hi & HW_PSTATE_VALID_MASK)) { dprintk("invalid pstate %d, ignoring\n", index); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } @@ -970,7 +971,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, /* verify frequency is OK */ if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { dprintk("invalid freq %u kHz, ignoring\n", freq); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } @@ -978,7 +979,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, * BIOSs are using "off" to indicate invalid */ if (vid == VID_OFF) { dprintk("invalid vid %u, ignoring\n", vid); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } @@ -988,7 +989,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } }