diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 608af20a3494..e1312374725b 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -37,6 +37,13 @@ config CPU_FREQ_STAT If in doubt, say N. +config CPU_FREQ_TIMES + bool "CPU frequency time-in-state statistics" + help + Export CPU time-in-state information through procfs. + + If in doubt, say N. + choice prompt "Default CPUFreq governor" default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index c1ffeabe4ecf..648beca8ad41 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -5,7 +5,10 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o -# CPUfreq governors +# CPUfreq times +obj-$(CONFIG_CPU_FREQ_TIMES) += cpufreq_times.o + +# CPUfreq governors obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c233e9c7bc5e..cca97888c739 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -355,6 +356,7 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, } cpufreq_stats_record_transition(policy, freqs->new); + cpufreq_times_record_transition(freqs); policy->cur = freqs->new; } } @@ -1301,6 +1303,7 @@ static int cpufreq_online(unsigned int cpu) goto out_destroy_policy; cpufreq_stats_create_table(policy); + cpufreq_times_create_policy(policy); write_lock_irqsave(&cpufreq_driver_lock, flags); list_add(&policy->policy_list, &cpufreq_policy_list); diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c new file mode 100644 index 000000000000..339a3e9cf082 --- /dev/null +++ b/drivers/cpufreq/cpufreq_times.c @@ -0,0 +1,207 @@ +/* drivers/cpufreq/cpufreq_times.c + * + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ + +/** + * struct cpu_freqs - per-cpu frequency information + * @offset: start of these freqs' stats in task time_in_state array + * @max_state: number of entries in freq_table + * @last_index: index in freq_table of last frequency switched to + * @freq_table: list of available frequencies + */ +struct cpu_freqs { + unsigned int offset; + unsigned int max_state; + unsigned int last_index; + unsigned int freq_table[0]; +}; + +static struct cpu_freqs *all_freqs[NR_CPUS]; + +static unsigned int next_offset; + +void cpufreq_task_times_init(struct task_struct *p) +{ + unsigned long flags; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + p->time_in_state = NULL; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + p->max_state = 0; +} + +void cpufreq_task_times_alloc(struct task_struct *p) +{ + void *temp; + unsigned long flags; + unsigned int max_state = READ_ONCE(next_offset); + + /* We use one array to avoid multiple allocs per task */ + temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC); + if (!temp) + return; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + p->time_in_state = temp; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + p->max_state = max_state; +} + +/* Caller must hold task_time_in_state_lock */ +static int cpufreq_task_times_realloc_locked(struct task_struct *p) +{ + void *temp; + unsigned int max_state = READ_ONCE(next_offset); + + temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC); + if (!temp) + return -ENOMEM; + p->time_in_state = temp; + memset(p->time_in_state + p->max_state, 0, + (max_state - p->max_state) * sizeof(u64)); + p->max_state = max_state; + return 0; +} + +void cpufreq_task_times_exit(struct task_struct *p) +{ + unsigned long flags; + void *temp; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + temp = p->time_in_state; + p->time_in_state = NULL; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + kfree(temp); +} + +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p) +{ + unsigned int cpu, i; + u64 cputime; + unsigned long flags; + struct cpu_freqs *freqs; + struct cpu_freqs *last_freqs = NULL; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || freqs == last_freqs) + continue; + last_freqs = freqs; + + seq_printf(m, "cpu%u\n", cpu); + for (i = 0; i < freqs->max_state; i++) { + if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID) + continue; + cputime = 0; + if (freqs->offset + i < p->max_state && + p->time_in_state) + cputime = p->time_in_state[freqs->offset + i]; + seq_printf(m, "%u %lu\n", freqs->freq_table[i], + (unsigned long)nsec_to_clock_t(cputime)); + } + } + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + return 0; +} + +void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) +{ + unsigned long flags; + unsigned int state; + struct cpu_freqs *freqs = all_freqs[task_cpu(p)]; + + if (!freqs || p->flags & PF_EXITING) + return; + + state = freqs->offset + READ_ONCE(freqs->last_index); + + spin_lock_irqsave(&task_time_in_state_lock, flags); + if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) && + p->time_in_state) + p->time_in_state[state] += cputime; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); +} + +void cpufreq_times_create_policy(struct cpufreq_policy *policy) +{ + int cpu, index; + unsigned int count = 0; + struct cpufreq_frequency_table *pos, *table; + struct cpu_freqs *freqs; + void *tmp; + + if (all_freqs[policy->cpu]) + return; + + table = policy->freq_table; + if (!table) + return; + + cpufreq_for_each_entry(pos, table) + count++; + + tmp = kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count, + GFP_KERNEL); + if (!tmp) + return; + + freqs = tmp; + freqs->max_state = count; + + index = cpufreq_frequency_table_get_index(policy, policy->cur); + if (index >= 0) + WRITE_ONCE(freqs->last_index, index); + + cpufreq_for_each_entry(pos, table) + freqs->freq_table[pos - table] = pos->frequency; + + freqs->offset = next_offset; + WRITE_ONCE(next_offset, freqs->offset + count); + for_each_cpu(cpu, policy->related_cpus) + all_freqs[cpu] = freqs; +} + +void cpufreq_times_record_transition(struct cpufreq_freqs *freq) +{ + int index; + struct cpu_freqs *freqs = all_freqs[freq->cpu]; + struct cpufreq_policy *policy; + + if (!freqs) + return; + + policy = cpufreq_cpu_get(freq->cpu); + if (!policy) + return; + + index = cpufreq_frequency_table_get_index(policy, freq->new); + if (index >= 0) + WRITE_ONCE(freqs->last_index, index); + + cpufreq_cpu_put(policy); +} diff --git a/fs/proc/base.c b/fs/proc/base.c index 81d77b15b347..b3ae1d0378f2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include "internal.h" #include "fd.h" @@ -3002,6 +3003,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_CPU_FREQ_TIMES + ONE("time_in_state", 0444, proc_time_in_state_show), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3380,6 +3384,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_CPU_FREQ_TIMES + ONE("time_in_state", 0444, proc_time_in_state_show), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h new file mode 100644 index 000000000000..bfef6e62c68a --- /dev/null +++ b/include/linux/cpufreq_times.h @@ -0,0 +1,41 @@ +/* drivers/cpufreq/cpufreq_times.c + * + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_CPUFREQ_TIMES_H +#define _LINUX_CPUFREQ_TIMES_H + +#include +#include + +#ifdef CONFIG_CPU_FREQ_TIMES +void cpufreq_task_times_init(struct task_struct *p); +void cpufreq_task_times_alloc(struct task_struct *p); +void cpufreq_task_times_exit(struct task_struct *p); +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p); +void cpufreq_acct_update_power(struct task_struct *p, u64 cputime); +void cpufreq_times_create_policy(struct cpufreq_policy *policy); +void cpufreq_times_record_transition(struct cpufreq_freqs *freq); +#else +static inline void cpufreq_task_times_init(struct task_struct *p) {} +static inline void cpufreq_task_times_alloc(struct task_struct *p) {} +static inline void cpufreq_task_times_exit(struct task_struct *p) {} +static inline void cpufreq_acct_update_power(struct task_struct *p, + u64 cputime) {} +static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {} +static inline void cpufreq_times_record_transition( + struct cpufreq_freqs *freq) {} +#endif /* CONFIG_CPU_FREQ_TIMES */ +#endif /* _LINUX_CPUFREQ_TIMES_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 4abb5bd74b04..061b6063ba73 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -799,6 +799,10 @@ struct task_struct { u64 stimescaled; #endif u64 gtime; +#ifdef CONFIG_CPU_FREQ_TIMES + u64 *time_in_state; + unsigned int max_state; +#endif struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN struct vtime vtime; diff --git a/kernel/fork.c b/kernel/fork.c index 64ef113e387e..d4f31de622be 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -91,6 +91,7 @@ #include #include #include +#include #include #include @@ -394,6 +395,8 @@ void put_task_stack(struct task_struct *tsk) void free_task(struct task_struct *tsk) { + cpufreq_task_times_exit(tsk); + #ifndef CONFIG_THREAD_INFO_IN_TASK /* * The task is finally done with both the stack and thread_info, @@ -1708,6 +1711,8 @@ static __latent_entropy struct task_struct *copy_process( if (!p) goto fork_out; + cpufreq_task_times_init(p); + /* * This _must_ happen before we call free_task(), i.e. before we jump * to any of the bad_fork_* labels. This is to avoid freeing @@ -2179,6 +2184,8 @@ long _do_fork(unsigned long clone_flags, if (IS_ERR(p)) return PTR_ERR(p); + cpufreq_task_times_alloc(p); + /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 0796f938c4f0..ca4208d46872 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -1,6 +1,7 @@ /* * Simple CPU accounting cgroup controller */ +#include #include "sched.h" #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -128,6 +129,9 @@ void account_user_time(struct task_struct *p, u64 cputime) /* Account for user time used */ acct_account_cputime(p); + + /* Account power usage for user time */ + cpufreq_acct_update_power(p, cputime); } /* @@ -172,6 +176,9 @@ void account_system_index_time(struct task_struct *p, /* Account for system time used */ acct_account_cputime(p); + + /* Account power usage for system time */ + cpufreq_acct_update_power(p, cputime); } /*