dm stats: support precise timestamps
Make it possible to use precise timestamps with nanosecond granularity in dm statistics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
dd4c1b7d0c
commit
c96aec344d
3 changed files with 127 additions and 43 deletions
|
@ -13,9 +13,13 @@ the range specified.
|
|||
The I/O statistics counters for each step-sized area of a region are
|
||||
in the same format as /sys/block/*/stat or /proc/diskstats (see:
|
||||
Documentation/iostats.txt). But two extra counters (12 and 13) are
|
||||
provided: total time spent reading and writing in milliseconds. All
|
||||
these counters may be accessed by sending the @stats_print message to
|
||||
the appropriate DM device via dmsetup.
|
||||
provided: total time spent reading and writing. All these counters may
|
||||
be accessed by sending the @stats_print message to the appropriate DM
|
||||
device via dmsetup.
|
||||
|
||||
The reported times are in milliseconds and the granularity depends on
|
||||
the kernel ticks. When the option precise_timestamps is used, the
|
||||
reported times are in nanoseconds.
|
||||
|
||||
Each region has a corresponding unique identifier, which we call a
|
||||
region_id, that is assigned when the region is created. The region_id
|
||||
|
@ -33,7 +37,9 @@ memory is used by reading
|
|||
Messages
|
||||
========
|
||||
|
||||
@stats_create <range> <step> [<program_id> [<aux_data>]]
|
||||
@stats_create <range> <step>
|
||||
[<number_of_optional_arguments> <optional_arguments>...]
|
||||
[<program_id> [<aux_data>]]
|
||||
|
||||
Create a new region and return the region_id.
|
||||
|
||||
|
@ -48,6 +54,17 @@ Messages
|
|||
"/<number_of_areas>" - the range is subdivided into the specified
|
||||
number of areas.
|
||||
|
||||
<number_of_optional_arguments>
|
||||
The number of optional arguments
|
||||
|
||||
<optional_arguments>
|
||||
The following optional arguments are supported
|
||||
precise_timestamps - use precise timer with nanosecond resolution
|
||||
instead of the "jiffies" variable. When this argument is
|
||||
used, the resulting times are in nanoseconds instead of
|
||||
milliseconds. Precise timestamps are a little bit slower
|
||||
to obtain than jiffies-based timestamps.
|
||||
|
||||
<program_id>
|
||||
An optional parameter. A name that uniquely identifies
|
||||
the userspace owner of the range. This groups ranges together
|
||||
|
@ -55,6 +72,9 @@ Messages
|
|||
created and ignore those created by others.
|
||||
The kernel returns this string back in the output of
|
||||
@stats_list message, but it doesn't use it for anything else.
|
||||
If we omit the number of optional arguments, program id must not
|
||||
be a number, otherwise it would be interpreted as the number of
|
||||
optional arguments.
|
||||
|
||||
<aux_data>
|
||||
An optional parameter. A word that provides auxiliary data
|
||||
|
|
|
@ -33,13 +33,14 @@ struct dm_stat_percpu {
|
|||
|
||||
struct dm_stat_shared {
|
||||
atomic_t in_flight[2];
|
||||
unsigned long stamp;
|
||||
unsigned long long stamp;
|
||||
struct dm_stat_percpu tmp;
|
||||
};
|
||||
|
||||
struct dm_stat {
|
||||
struct list_head list_entry;
|
||||
int id;
|
||||
unsigned stat_flags;
|
||||
size_t n_entries;
|
||||
sector_t start;
|
||||
sector_t end;
|
||||
|
@ -53,6 +54,8 @@ struct dm_stat {
|
|||
struct dm_stat_shared stat_shared[0];
|
||||
};
|
||||
|
||||
#define STAT_PRECISE_TIMESTAMPS 1
|
||||
|
||||
struct dm_stats_last_position {
|
||||
sector_t last_sector;
|
||||
unsigned last_rw;
|
||||
|
@ -224,7 +227,8 @@ void dm_stats_cleanup(struct dm_stats *stats)
|
|||
}
|
||||
|
||||
static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
|
||||
sector_t step, const char *program_id, const char *aux_data,
|
||||
sector_t step, unsigned stat_flags,
|
||||
const char *program_id, const char *aux_data,
|
||||
void (*suspend_callback)(struct mapped_device *),
|
||||
void (*resume_callback)(struct mapped_device *),
|
||||
struct mapped_device *md)
|
||||
|
@ -265,6 +269,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
|
|||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
s->stat_flags = stat_flags;
|
||||
s->n_entries = n_entries;
|
||||
s->start = start;
|
||||
s->end = end;
|
||||
|
@ -414,18 +419,24 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p)
|
||||
static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
|
||||
struct dm_stat_percpu *p)
|
||||
{
|
||||
/*
|
||||
* This is racy, but so is part_round_stats_single.
|
||||
*/
|
||||
unsigned long now = jiffies;
|
||||
unsigned in_flight_read;
|
||||
unsigned in_flight_write;
|
||||
unsigned long difference = now - shared->stamp;
|
||||
unsigned long long now, difference;
|
||||
unsigned in_flight_read, in_flight_write;
|
||||
|
||||
if (likely(!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)))
|
||||
now = jiffies;
|
||||
else
|
||||
now = ktime_to_ns(ktime_get());
|
||||
|
||||
difference = now - shared->stamp;
|
||||
if (!difference)
|
||||
return;
|
||||
|
||||
in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
|
||||
in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
|
||||
if (in_flight_read)
|
||||
|
@ -440,8 +451,9 @@ static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *
|
|||
}
|
||||
|
||||
static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
|
||||
unsigned long bi_rw, sector_t len, bool merged,
|
||||
bool end, unsigned long duration)
|
||||
unsigned long bi_rw, sector_t len,
|
||||
struct dm_stats_aux *stats_aux, bool end,
|
||||
unsigned long duration_jiffies)
|
||||
{
|
||||
unsigned long idx = bi_rw & REQ_WRITE;
|
||||
struct dm_stat_shared *shared = &s->stat_shared[entry];
|
||||
|
@ -471,15 +483,18 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
|
|||
p = &s->stat_percpu[smp_processor_id()][entry];
|
||||
|
||||
if (!end) {
|
||||
dm_stat_round(shared, p);
|
||||
dm_stat_round(s, shared, p);
|
||||
atomic_inc(&shared->in_flight[idx]);
|
||||
} else {
|
||||
dm_stat_round(shared, p);
|
||||
dm_stat_round(s, shared, p);
|
||||
atomic_dec(&shared->in_flight[idx]);
|
||||
p->sectors[idx] += len;
|
||||
p->ios[idx] += 1;
|
||||
p->merges[idx] += merged;
|
||||
p->ticks[idx] += duration;
|
||||
p->merges[idx] += stats_aux->merged;
|
||||
if (!(s->stat_flags & STAT_PRECISE_TIMESTAMPS))
|
||||
p->ticks[idx] += duration_jiffies;
|
||||
else
|
||||
p->ticks[idx] += stats_aux->duration_ns;
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
|
@ -491,7 +506,7 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
|
|||
|
||||
static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
|
||||
sector_t bi_sector, sector_t end_sector,
|
||||
bool end, unsigned long duration,
|
||||
bool end, unsigned long duration_jiffies,
|
||||
struct dm_stats_aux *stats_aux)
|
||||
{
|
||||
sector_t rel_sector, offset, todo, fragment_len;
|
||||
|
@ -520,7 +535,7 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
|
|||
if (fragment_len > s->step - offset)
|
||||
fragment_len = s->step - offset;
|
||||
dm_stat_for_entry(s, entry, bi_rw, fragment_len,
|
||||
stats_aux->merged, end, duration);
|
||||
stats_aux, end, duration_jiffies);
|
||||
todo -= fragment_len;
|
||||
entry++;
|
||||
offset = 0;
|
||||
|
@ -529,11 +544,13 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
|
|||
|
||||
void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
|
||||
sector_t bi_sector, unsigned bi_sectors, bool end,
|
||||
unsigned long duration, struct dm_stats_aux *stats_aux)
|
||||
unsigned long duration_jiffies,
|
||||
struct dm_stats_aux *stats_aux)
|
||||
{
|
||||
struct dm_stat *s;
|
||||
sector_t end_sector;
|
||||
struct dm_stats_last_position *last;
|
||||
bool got_precise_time;
|
||||
|
||||
if (unlikely(!bi_sectors))
|
||||
return;
|
||||
|
@ -557,8 +574,17 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
|
|||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(s, &stats->list, list_entry)
|
||||
__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux);
|
||||
got_precise_time = false;
|
||||
list_for_each_entry_rcu(s, &stats->list, list_entry) {
|
||||
if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) {
|
||||
if (!end)
|
||||
stats_aux->duration_ns = ktime_to_ns(ktime_get());
|
||||
else
|
||||
stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
|
||||
got_precise_time = true;
|
||||
}
|
||||
__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -571,7 +597,7 @@ static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared
|
|||
|
||||
local_irq_disable();
|
||||
p = &s->stat_percpu[smp_processor_id()][x];
|
||||
dm_stat_round(shared, p);
|
||||
dm_stat_round(s, shared, p);
|
||||
local_irq_enable();
|
||||
|
||||
memset(&shared->tmp, 0, sizeof(shared->tmp));
|
||||
|
@ -643,11 +669,15 @@ static int dm_stats_clear(struct dm_stats *stats, int id)
|
|||
/*
|
||||
* This is like jiffies_to_msec, but works for 64-bit values.
|
||||
*/
|
||||
static unsigned long long dm_jiffies_to_msec64(unsigned long long j)
|
||||
static unsigned long long dm_jiffies_to_msec64(struct dm_stat *s, unsigned long long j)
|
||||
{
|
||||
unsigned long long result = 0;
|
||||
unsigned long long result;
|
||||
unsigned mult;
|
||||
|
||||
if (s->stat_flags & STAT_PRECISE_TIMESTAMPS)
|
||||
return j;
|
||||
|
||||
result = 0;
|
||||
if (j)
|
||||
result = jiffies_to_msecs(j & 0x3fffff);
|
||||
if (j >= 1 << 22) {
|
||||
|
@ -709,16 +739,16 @@ static int dm_stats_print(struct dm_stats *stats, int id,
|
|||
shared->tmp.ios[READ],
|
||||
shared->tmp.merges[READ],
|
||||
shared->tmp.sectors[READ],
|
||||
dm_jiffies_to_msec64(shared->tmp.ticks[READ]),
|
||||
dm_jiffies_to_msec64(s, shared->tmp.ticks[READ]),
|
||||
shared->tmp.ios[WRITE],
|
||||
shared->tmp.merges[WRITE],
|
||||
shared->tmp.sectors[WRITE],
|
||||
dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]),
|
||||
dm_jiffies_to_msec64(s, shared->tmp.ticks[WRITE]),
|
||||
dm_stat_in_flight(shared),
|
||||
dm_jiffies_to_msec64(shared->tmp.io_ticks_total),
|
||||
dm_jiffies_to_msec64(shared->tmp.time_in_queue),
|
||||
dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]),
|
||||
dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE]));
|
||||
dm_jiffies_to_msec64(s, shared->tmp.io_ticks_total),
|
||||
dm_jiffies_to_msec64(s, shared->tmp.time_in_queue),
|
||||
dm_jiffies_to_msec64(s, shared->tmp.io_ticks[READ]),
|
||||
dm_jiffies_to_msec64(s, shared->tmp.io_ticks[WRITE]));
|
||||
|
||||
if (unlikely(sz + 1 >= maxlen))
|
||||
goto buffer_overflow;
|
||||
|
@ -769,21 +799,31 @@ static int message_stats_create(struct mapped_device *md,
|
|||
unsigned long long start, end, len, step;
|
||||
unsigned divisor;
|
||||
const char *program_id, *aux_data;
|
||||
unsigned stat_flags = 0;
|
||||
|
||||
struct dm_arg_set as, as_backup;
|
||||
const char *a;
|
||||
unsigned feature_args;
|
||||
|
||||
/*
|
||||
* Input format:
|
||||
* <range> <step> [<program_id> [<aux_data>]]
|
||||
* <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]]
|
||||
*/
|
||||
|
||||
if (argc < 3 || argc > 5)
|
||||
if (argc < 3)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(argv[1], "-")) {
|
||||
as.argc = argc;
|
||||
as.argv = argv;
|
||||
dm_consume_args(&as, 1);
|
||||
|
||||
a = dm_shift_arg(&as);
|
||||
if (!strcmp(a, "-")) {
|
||||
start = 0;
|
||||
len = dm_get_size(md);
|
||||
if (!len)
|
||||
len = 1;
|
||||
} else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 ||
|
||||
} else if (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 ||
|
||||
start != (sector_t)start || len != (sector_t)len)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -791,7 +831,8 @@ static int message_stats_create(struct mapped_device *md,
|
|||
if (start >= end)
|
||||
return -EINVAL;
|
||||
|
||||
if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) {
|
||||
a = dm_shift_arg(&as);
|
||||
if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) {
|
||||
if (!divisor)
|
||||
return -EINVAL;
|
||||
step = end - start;
|
||||
|
@ -799,18 +840,39 @@ static int message_stats_create(struct mapped_device *md,
|
|||
step++;
|
||||
if (!step)
|
||||
step = 1;
|
||||
} else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 ||
|
||||
} else if (sscanf(a, "%llu%c", &step, &dummy) != 1 ||
|
||||
step != (sector_t)step || !step)
|
||||
return -EINVAL;
|
||||
|
||||
as_backup = as;
|
||||
a = dm_shift_arg(&as);
|
||||
if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) {
|
||||
while (feature_args--) {
|
||||
a = dm_shift_arg(&as);
|
||||
if (!a)
|
||||
return -EINVAL;
|
||||
if (!strcasecmp(a, "precise_timestamps"))
|
||||
stat_flags |= STAT_PRECISE_TIMESTAMPS;
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
as = as_backup;
|
||||
}
|
||||
|
||||
program_id = "-";
|
||||
aux_data = "-";
|
||||
|
||||
if (argc > 3)
|
||||
program_id = argv[3];
|
||||
a = dm_shift_arg(&as);
|
||||
if (a)
|
||||
program_id = a;
|
||||
|
||||
if (argc > 4)
|
||||
aux_data = argv[4];
|
||||
a = dm_shift_arg(&as);
|
||||
if (a)
|
||||
aux_data = a;
|
||||
|
||||
if (as.argc)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If a buffer overflow happens after we created the region,
|
||||
|
@ -822,7 +884,7 @@ static int message_stats_create(struct mapped_device *md,
|
|||
if (dm_message_test_buffer_overflow(result, maxlen))
|
||||
return 1;
|
||||
|
||||
id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
|
||||
id = dm_stats_create(dm_get_stats(md), start, end, step, stat_flags, program_id, aux_data,
|
||||
dm_internal_suspend_fast, dm_internal_resume_fast, md);
|
||||
if (id < 0)
|
||||
return id;
|
||||
|
|
|
@ -18,6 +18,7 @@ struct dm_stats {
|
|||
|
||||
struct dm_stats_aux {
|
||||
bool merged;
|
||||
unsigned long long duration_ns;
|
||||
};
|
||||
|
||||
void dm_stats_init(struct dm_stats *st);
|
||||
|
@ -30,7 +31,8 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
|
|||
|
||||
void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
|
||||
sector_t bi_sector, unsigned bi_sectors, bool end,
|
||||
unsigned long duration, struct dm_stats_aux *aux);
|
||||
unsigned long duration_jiffies,
|
||||
struct dm_stats_aux *aux);
|
||||
|
||||
static inline bool dm_stats_used(struct dm_stats *st)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue