580f26b93e
Delay accounting already measures the time a task spends in direct reclaim and waiting for swapin, but in low memory situations tasks spend can spend a significant amount of their time waiting on thrashing page cache. This isn't tracked right now. To know the full impact of memory contention on an individual task, measure the delay when waiting for a recently evicted active cache page to read back into memory. Also update tools/accounting/getdelays.c: [hannes@computer accounting]$ sudo ./getdelays -d -p 1 print delayacct stats ON PID 1 CPU count real total virtual total delay total delay average 50318 745000000 847346785 400533713 0.008ms IO count delay total delay average 435 122601218 0ms SWAP count delay total delay average 0 0 0ms RECLAIM count delay total delay average 0 0 0ms THRASHING count delay total delay average 19 12621439 0ms Link: http://lkml.kernel.org/r/20180828172258.3185-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Daniel Drake <drake@endlessm.com> Tested-by: Suren Baghdasaryan <surenb@google.com> Cc: Christopher Lameter <cl@linux.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Johannes Weiner <jweiner@fb.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Enderborg <peter.enderborg@sony.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Shakeel Butt <shakeelb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vinayak Menon <vinmenon@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> (cherry picked from commit b1d29ba82cf2bc784f4c963ddd6a2cf29e229b33) Bug: 127712811 Test: lmkd in PSI mode Change-Id: I259f693987cf04e6a52ee7e8accf55a17e0de005 Signed-off-by: Suren Baghdasaryan <surenb@google.com>
186 lines
5.1 KiB
C
186 lines
5.1 KiB
C
/* delayacct.c - per-task delay accounting
|
|
*
|
|
* Copyright (C) Shailabh Nagar, IBM Corp. 2006
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it would be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
|
|
* the GNU General Public License for more details.
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/sched/cputime.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/taskstats.h>
|
|
#include <linux/time.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/delayacct.h>
|
|
#include <linux/module.h>
|
|
|
|
int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
|
|
EXPORT_SYMBOL_GPL(delayacct_on);
|
|
struct kmem_cache *delayacct_cache;
|
|
|
|
static int __init delayacct_setup_disable(char *str)
|
|
{
|
|
delayacct_on = 0;
|
|
return 1;
|
|
}
|
|
__setup("nodelayacct", delayacct_setup_disable);
|
|
|
|
void delayacct_init(void)
|
|
{
|
|
delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
|
|
delayacct_tsk_init(&init_task);
|
|
}
|
|
|
|
void __delayacct_tsk_init(struct task_struct *tsk)
|
|
{
|
|
tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
|
|
if (tsk->delays)
|
|
raw_spin_lock_init(&tsk->delays->lock);
|
|
}
|
|
|
|
/*
|
|
* Finish delay accounting for a statistic using its timestamps (@start),
|
|
* accumalator (@total) and @count
|
|
*/
|
|
static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
|
|
u32 *count)
|
|
{
|
|
s64 ns = ktime_get_ns() - *start;
|
|
unsigned long flags;
|
|
|
|
if (ns > 0) {
|
|
raw_spin_lock_irqsave(lock, flags);
|
|
*total += ns;
|
|
(*count)++;
|
|
raw_spin_unlock_irqrestore(lock, flags);
|
|
}
|
|
}
|
|
|
|
void __delayacct_blkio_start(void)
|
|
{
|
|
current->delays->blkio_start = ktime_get_ns();
|
|
}
|
|
|
|
/*
|
|
* We cannot rely on the `current` macro, as we haven't yet switched back to
|
|
* the process being woken.
|
|
*/
|
|
void __delayacct_blkio_end(struct task_struct *p)
|
|
{
|
|
struct task_delay_info *delays = p->delays;
|
|
u64 *total;
|
|
u32 *count;
|
|
|
|
if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
|
|
total = &delays->swapin_delay;
|
|
count = &delays->swapin_count;
|
|
} else {
|
|
total = &delays->blkio_delay;
|
|
count = &delays->blkio_count;
|
|
}
|
|
|
|
delayacct_end(&delays->lock, &delays->blkio_start, total, count);
|
|
}
|
|
|
|
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
|
|
{
|
|
u64 utime, stime, stimescaled, utimescaled;
|
|
unsigned long long t2, t3;
|
|
unsigned long flags, t1;
|
|
s64 tmp;
|
|
|
|
task_cputime(tsk, &utime, &stime);
|
|
tmp = (s64)d->cpu_run_real_total;
|
|
tmp += utime + stime;
|
|
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
|
|
|
|
task_cputime_scaled(tsk, &utimescaled, &stimescaled);
|
|
tmp = (s64)d->cpu_scaled_run_real_total;
|
|
tmp += utimescaled + stimescaled;
|
|
d->cpu_scaled_run_real_total =
|
|
(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
|
|
|
|
/*
|
|
* No locking available for sched_info (and too expensive to add one)
|
|
* Mitigate by taking snapshot of values
|
|
*/
|
|
t1 = tsk->sched_info.pcount;
|
|
t2 = tsk->sched_info.run_delay;
|
|
t3 = tsk->se.sum_exec_runtime;
|
|
|
|
d->cpu_count += t1;
|
|
|
|
tmp = (s64)d->cpu_delay_total + t2;
|
|
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
|
|
|
|
tmp = (s64)d->cpu_run_virtual_total + t3;
|
|
d->cpu_run_virtual_total =
|
|
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
|
|
|
|
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
|
|
|
|
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
|
|
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
|
|
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
|
|
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
|
|
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
|
|
tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
|
|
d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
|
|
tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
|
|
d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
|
|
d->blkio_count += tsk->delays->blkio_count;
|
|
d->swapin_count += tsk->delays->swapin_count;
|
|
d->freepages_count += tsk->delays->freepages_count;
|
|
d->thrashing_count += tsk->delays->thrashing_count;
|
|
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
|
|
|
|
return 0;
|
|
}
|
|
|
|
__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
|
|
{
|
|
__u64 ret;
|
|
unsigned long flags;
|
|
|
|
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
|
|
ret = nsec_to_clock_t(tsk->delays->blkio_delay +
|
|
tsk->delays->swapin_delay);
|
|
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
|
|
return ret;
|
|
}
|
|
|
|
void __delayacct_freepages_start(void)
|
|
{
|
|
current->delays->freepages_start = ktime_get_ns();
|
|
}
|
|
|
|
void __delayacct_freepages_end(void)
|
|
{
|
|
delayacct_end(
|
|
¤t->delays->lock,
|
|
¤t->delays->freepages_start,
|
|
¤t->delays->freepages_delay,
|
|
¤t->delays->freepages_count);
|
|
}
|
|
|
|
void __delayacct_thrashing_start(void)
|
|
{
|
|
current->delays->thrashing_start = ktime_get_ns();
|
|
}
|
|
|
|
void __delayacct_thrashing_end(void)
|
|
{
|
|
delayacct_end(¤t->delays->lock,
|
|
¤t->delays->thrashing_start,
|
|
¤t->delays->thrashing_delay,
|
|
¤t->delays->thrashing_count);
|
|
}
|