ANDROID: cpufreq: track per-task time in state

Add time in state data to task structs, and create
/proc/<pid>/time_in_state files to show how long each individual task
has run at each frequency.
Create a CONFIG_CPU_FREQ_TIMES option to enable/disable this tracking.

Bug: 72339335
Bug: 127641090
Test: Read /proc/<pid>/time_in_state
Change-Id: Ia6456754f4cb1e83b2bc35efa8fbe9f8696febc8
Signed-off-by: Connor O'Brien <connoro@google.com>
[astrachan: Folded the following changes into this patch:
            a6d3de6a7fba ("ANDROID: Reduce use of #ifdef CONFIG_CPU_FREQ_TIMES")
            b89ada5d9c09 ("ANDROID: Fix massive cpufreq_times memory leaks")]
Signed-off-by: Alistair Strachan <astrachan@google.com>
This commit is contained in:
Connor O'Brien 2018-01-31 18:11:57 -08:00 committed by Alistair Strachan
parent ccaa9b9b37
commit 406d53f0c7
9 changed files with 287 additions and 1 deletions

View file

@ -37,6 +37,13 @@ config CPU_FREQ_STAT
If in doubt, say N.
config CPU_FREQ_TIMES
bool "CPU frequency time-in-state statistics"
help
Export CPU time-in-state information through procfs.
If in doubt, say N.
choice
prompt "Default CPUFreq governor"
default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ

View file

@ -5,7 +5,10 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o
# CPUfreq stats
obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o
# CPUfreq governors
# CPUfreq times
obj-$(CONFIG_CPU_FREQ_TIMES) += cpufreq_times.o
# CPUfreq governors
obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o
obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o

View file

@ -19,6 +19,7 @@
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpufreq_times.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/init.h>
@ -355,6 +356,7 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy,
}
cpufreq_stats_record_transition(policy, freqs->new);
cpufreq_times_record_transition(freqs);
policy->cur = freqs->new;
}
}
@ -1301,6 +1303,7 @@ static int cpufreq_online(unsigned int cpu)
goto out_destroy_policy;
cpufreq_stats_create_table(policy);
cpufreq_times_create_policy(policy);
write_lock_irqsave(&cpufreq_driver_lock, flags);
list_add(&policy->policy_list, &cpufreq_policy_list);

View file

@ -0,0 +1,207 @@
/* drivers/cpufreq/cpufreq_times.c
*
* Copyright (C) 2018 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/cpufreq.h>
#include <linux/cpufreq_times.h>
#include <linux/jiffies.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */
/**
* struct cpu_freqs - per-cpu frequency information
* @offset: start of these freqs' stats in task time_in_state array
* @max_state: number of entries in freq_table
* @last_index: index in freq_table of last frequency switched to
* @freq_table: list of available frequencies
*/
struct cpu_freqs {
unsigned int offset;
unsigned int max_state;
unsigned int last_index;
unsigned int freq_table[0];
};
static struct cpu_freqs *all_freqs[NR_CPUS];
static unsigned int next_offset;
void cpufreq_task_times_init(struct task_struct *p)
{
unsigned long flags;
spin_lock_irqsave(&task_time_in_state_lock, flags);
p->time_in_state = NULL;
spin_unlock_irqrestore(&task_time_in_state_lock, flags);
p->max_state = 0;
}
void cpufreq_task_times_alloc(struct task_struct *p)
{
void *temp;
unsigned long flags;
unsigned int max_state = READ_ONCE(next_offset);
/* We use one array to avoid multiple allocs per task */
temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC);
if (!temp)
return;
spin_lock_irqsave(&task_time_in_state_lock, flags);
p->time_in_state = temp;
spin_unlock_irqrestore(&task_time_in_state_lock, flags);
p->max_state = max_state;
}
/* Caller must hold task_time_in_state_lock */
static int cpufreq_task_times_realloc_locked(struct task_struct *p)
{
void *temp;
unsigned int max_state = READ_ONCE(next_offset);
temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC);
if (!temp)
return -ENOMEM;
p->time_in_state = temp;
memset(p->time_in_state + p->max_state, 0,
(max_state - p->max_state) * sizeof(u64));
p->max_state = max_state;
return 0;
}
void cpufreq_task_times_exit(struct task_struct *p)
{
unsigned long flags;
void *temp;
spin_lock_irqsave(&task_time_in_state_lock, flags);
temp = p->time_in_state;
p->time_in_state = NULL;
spin_unlock_irqrestore(&task_time_in_state_lock, flags);
kfree(temp);
}
int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
unsigned int cpu, i;
u64 cputime;
unsigned long flags;
struct cpu_freqs *freqs;
struct cpu_freqs *last_freqs = NULL;
spin_lock_irqsave(&task_time_in_state_lock, flags);
for_each_possible_cpu(cpu) {
freqs = all_freqs[cpu];
if (!freqs || freqs == last_freqs)
continue;
last_freqs = freqs;
seq_printf(m, "cpu%u\n", cpu);
for (i = 0; i < freqs->max_state; i++) {
if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID)
continue;
cputime = 0;
if (freqs->offset + i < p->max_state &&
p->time_in_state)
cputime = p->time_in_state[freqs->offset + i];
seq_printf(m, "%u %lu\n", freqs->freq_table[i],
(unsigned long)nsec_to_clock_t(cputime));
}
}
spin_unlock_irqrestore(&task_time_in_state_lock, flags);
return 0;
}
void cpufreq_acct_update_power(struct task_struct *p, u64 cputime)
{
unsigned long flags;
unsigned int state;
struct cpu_freqs *freqs = all_freqs[task_cpu(p)];
if (!freqs || p->flags & PF_EXITING)
return;
state = freqs->offset + READ_ONCE(freqs->last_index);
spin_lock_irqsave(&task_time_in_state_lock, flags);
if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) &&
p->time_in_state)
p->time_in_state[state] += cputime;
spin_unlock_irqrestore(&task_time_in_state_lock, flags);
}
void cpufreq_times_create_policy(struct cpufreq_policy *policy)
{
int cpu, index;
unsigned int count = 0;
struct cpufreq_frequency_table *pos, *table;
struct cpu_freqs *freqs;
void *tmp;
if (all_freqs[policy->cpu])
return;
table = policy->freq_table;
if (!table)
return;
cpufreq_for_each_entry(pos, table)
count++;
tmp = kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count,
GFP_KERNEL);
if (!tmp)
return;
freqs = tmp;
freqs->max_state = count;
index = cpufreq_frequency_table_get_index(policy, policy->cur);
if (index >= 0)
WRITE_ONCE(freqs->last_index, index);
cpufreq_for_each_entry(pos, table)
freqs->freq_table[pos - table] = pos->frequency;
freqs->offset = next_offset;
WRITE_ONCE(next_offset, freqs->offset + count);
for_each_cpu(cpu, policy->related_cpus)
all_freqs[cpu] = freqs;
}
void cpufreq_times_record_transition(struct cpufreq_freqs *freq)
{
int index;
struct cpu_freqs *freqs = all_freqs[freq->cpu];
struct cpufreq_policy *policy;
if (!freqs)
return;
policy = cpufreq_cpu_get(freq->cpu);
if (!policy)
return;
index = cpufreq_frequency_table_get_index(policy, freq->new);
if (index >= 0)
WRITE_ONCE(freqs->last_index, index);
cpufreq_cpu_put(policy);
}

View file

@ -94,6 +94,7 @@
#include <linux/sched/stat.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
#include <linux/cpufreq_times.h>
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@ -3002,6 +3003,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
#ifdef CONFIG_CPU_FREQ_TIMES
ONE("time_in_state", 0444, proc_time_in_state_show),
#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@ -3380,6 +3384,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
#ifdef CONFIG_CPU_FREQ_TIMES
ONE("time_in_state", 0444, proc_time_in_state_show),
#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)

View file

@ -0,0 +1,41 @@
/* drivers/cpufreq/cpufreq_times.c
*
* Copyright (C) 2018 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef _LINUX_CPUFREQ_TIMES_H
#define _LINUX_CPUFREQ_TIMES_H
#include <linux/cpufreq.h>
#include <linux/pid.h>
#ifdef CONFIG_CPU_FREQ_TIMES
void cpufreq_task_times_init(struct task_struct *p);
void cpufreq_task_times_alloc(struct task_struct *p);
void cpufreq_task_times_exit(struct task_struct *p);
int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p);
void cpufreq_acct_update_power(struct task_struct *p, u64 cputime);
void cpufreq_times_create_policy(struct cpufreq_policy *policy);
void cpufreq_times_record_transition(struct cpufreq_freqs *freq);
#else
static inline void cpufreq_task_times_init(struct task_struct *p) {}
static inline void cpufreq_task_times_alloc(struct task_struct *p) {}
static inline void cpufreq_task_times_exit(struct task_struct *p) {}
static inline void cpufreq_acct_update_power(struct task_struct *p,
u64 cputime) {}
static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {}
static inline void cpufreq_times_record_transition(
struct cpufreq_freqs *freq) {}
#endif /* CONFIG_CPU_FREQ_TIMES */
#endif /* _LINUX_CPUFREQ_TIMES_H */

View file

@ -799,6 +799,10 @@ struct task_struct {
u64 stimescaled;
#endif
u64 gtime;
#ifdef CONFIG_CPU_FREQ_TIMES
u64 *time_in_state;
unsigned int max_state;
#endif
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
struct vtime vtime;

View file

@ -91,6 +91,7 @@
#include <linux/kcov.h>
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/cpufreq_times.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@ -394,6 +395,8 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk)
{
cpufreq_task_times_exit(tsk);
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@ -1708,6 +1711,8 @@ static __latent_entropy struct task_struct *copy_process(
if (!p)
goto fork_out;
cpufreq_task_times_init(p);
/*
* This _must_ happen before we call free_task(), i.e. before we jump
* to any of the bad_fork_* labels. This is to avoid freeing
@ -2179,6 +2184,8 @@ long _do_fork(unsigned long clone_flags,
if (IS_ERR(p))
return PTR_ERR(p);
cpufreq_task_times_alloc(p);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.

View file

@ -1,6 +1,7 @@
/*
* Simple CPU accounting cgroup controller
*/
#include <linux/cpufreq_times.h>
#include "sched.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@ -128,6 +129,9 @@ void account_user_time(struct task_struct *p, u64 cputime)
/* Account for user time used */
acct_account_cputime(p);
/* Account power usage for user time */
cpufreq_acct_update_power(p, cputime);
}
/*
@ -172,6 +176,9 @@ void account_system_index_time(struct task_struct *p,
/* Account for system time used */
acct_account_cputime(p);
/* Account power usage for system time */
cpufreq_acct_update_power(p, cputime);
}
/*