Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  Enhanced partition statistics: documentation update
  Enhanced partition statistics: remove old partition statistics
  Enhanced partition statistics: procfs
  Enhanced partition statistics: sysfs
  Enhanced partition statistics: aoe fix
  Enhanced partition statistics: update partition statitics
  Enhanced partition statistics: core statistics
  block: fixup rq_init() a bit

Manually fixed conflict in drivers/block/aoe/aoecmd.c due to statistics
support.
This commit is contained in:
Linus Torvalds 2008-02-08 09:42:46 -08:00
commit 03054de1e0
10 changed files with 320 additions and 48 deletions

View file

@ -0,0 +1,22 @@
What: /proc/diskstats
Date: February 2008
Contact: Jerome Marchand <jmarchan@redhat.com>
Description:
The /proc/diskstats file displays the I/O statistics
of block devices. Each line contains the following 14
fields:
1 - major number
2 - minor mumber
3 - device name
4 - reads completed succesfully
5 - reads merged
6 - sectors read
7 - time spent reading (ms)
8 - writes completed
9 - writes merged
10 - sectors written
11 - time spent writing (ms)
12 - I/Os currently in progress
13 - time spent doing I/Os (ms)
14 - weighted time spent doing I/Os (ms)
For more details refer to Documentation/iostats.txt

View file

@ -0,0 +1,28 @@
What: /sys/block/<disk>/stat
Date: February 2008
Contact: Jerome Marchand <jmarchan@redhat.com>
Description:
The /sys/block/<disk>/stat files displays the I/O
statistics of disk <disk>. They contain 11 fields:
1 - reads completed succesfully
2 - reads merged
3 - sectors read
4 - time spent reading (ms)
5 - writes completed
6 - writes merged
7 - sectors written
8 - time spent writing (ms)
9 - I/Os currently in progress
10 - time spent doing I/Os (ms)
11 - weighted time spent doing I/Os (ms)
For more details refer Documentation/iostats.txt
What: /sys/block/<disk>/<part>/stat
Date: February 2008
Contact: Jerome Marchand <jmarchan@redhat.com>
Description:
The /sys/block/<disk>/<part>/stat files display the
I/O statistics of partition <part>. The format is the
same as the above-written /sys/block/<disk>/stat
format.

View file

@ -58,7 +58,7 @@ they should not wrap twice before you notice them.
Each set of stats only applies to the indicated device; if you want
system-wide stats you'll have to find all the devices and sum them all up.
Field 1 -- # of reads issued
Field 1 -- # of reads completed
This is the total number of reads completed successfully.
Field 2 -- # of reads merged, field 6 -- # of writes merged
Reads and writes which are adjacent to each other may be merged for
@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time
of queuing for partitions, and at completion for whole disks. This is
a subtle distinction that is probably uninteresting for most cases.
More significant is the error induced by counting the numbers of
reads/writes before merges for partitions and after for disks. Since a
typical workload usually contains a lot of successive and adjacent requests,
the number of reads/writes issued can be several times higher than the
number of reads/writes completed.
In 2.6.25, the full statistic set is again available for partitions and
disk and partition statistics are consistent again. Since we still don't
keep record of the partition-relative address, an operation is attributed to
the partition which contains the first sector of the request after the
eventual merges. As requests can be merged across partition, this could lead
to some (probably insignificant) innacuracy.
Additional notes
----------------

View file

@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
return;
if (!new_io) {
__disk_stat_inc(rq->rq_disk, merges[rw]);
__all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
} else {
struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
disk_round_stats(rq->rq_disk);
rq->rq_disk->in_flight++;
if (part) {
part_round_stats(part);
part->in_flight++;
}
}
}
@ -102,27 +107,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
}
EXPORT_SYMBOL(blk_get_backing_dev_info);
/*
* We can't just memset() the structure, since the allocation path
* already stored some information in the request.
*/
void rq_init(struct request_queue *q, struct request *rq)
{
INIT_LIST_HEAD(&rq->queuelist);
INIT_LIST_HEAD(&rq->donelist);
rq->errors = 0;
rq->q = q;
rq->sector = rq->hard_sector = (sector_t) -1;
rq->nr_sectors = rq->hard_nr_sectors = 0;
rq->current_nr_sectors = rq->hard_cur_sectors = 0;
rq->bio = rq->biotail = NULL;
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
rq->ioprio = 0;
rq->buffer = NULL;
rq->ref_count = 1;
rq->q = q;
rq->special = NULL;
rq->data_len = 0;
rq->data = NULL;
rq->rq_disk = NULL;
rq->nr_phys_segments = 0;
rq->nr_hw_segments = 0;
rq->ioprio = 0;
rq->special = NULL;
rq->buffer = NULL;
rq->tag = -1;
rq->errors = 0;
rq->ref_count = 1;
rq->cmd_len = 0;
memset(rq->cmd, 0, sizeof(rq->cmd));
rq->data_len = 0;
rq->sense_len = 0;
rq->data = NULL;
rq->sense = NULL;
rq->end_io = NULL;
rq->end_io_data = NULL;
rq->completion_data = NULL;
rq->next_rq = NULL;
}
@ -986,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk)
}
EXPORT_SYMBOL_GPL(disk_round_stats);
void part_round_stats(struct hd_struct *part)
{
unsigned long now = jiffies;
if (now == part->stamp)
return;
if (part->in_flight) {
__part_stat_add(part, time_in_queue,
part->in_flight * (now - part->stamp));
__part_stat_add(part, io_ticks, (now - part->stamp));
}
part->stamp = now;
}
/*
* queue lock must be held
*/
@ -1188,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio)
if (bio_sectors(bio) && bdev != bdev->bd_contains) {
struct hd_struct *p = bdev->bd_part;
const int rw = bio_data_dir(bio);
p->sectors[rw] += bio_sectors(bio);
p->ios[rw]++;
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
@ -1519,7 +1546,8 @@ static int __end_that_request_first(struct request *req, int error,
if (blk_fs_request(req) && req->rq_disk) {
const int rw = rq_data_dir(req);
disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
all_stat_add(req->rq_disk, sectors[rw],
nr_bytes >> 9, req->sector);
}
total_bytes = bio_nbytes = 0;
@ -1704,11 +1732,16 @@ static void end_that_request_last(struct request *req, int error)
if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
struct hd_struct *part = get_part(disk, req->sector);
__disk_stat_inc(disk, ios[rw]);
__disk_stat_add(disk, ticks[rw], duration);
__all_stat_inc(disk, ios[rw], req->sector);
__all_stat_add(disk, ticks[rw], duration, req->sector);
disk_round_stats(disk);
disk->in_flight--;
if (part) {
part_round_stats(part);
part->in_flight--;
}
}
if (req->end_io)

View file

@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
elv_merge_requests(q, req, next);
if (req->rq_disk) {
struct hd_struct *part
= get_part(req->rq_disk, req->sector);
disk_round_stats(req->rq_disk);
req->rq_disk->in_flight--;
if (part) {
part_round_stats(part);
part->in_flight--;
}
}
req->ioprio = ioprio_best(req->ioprio, next->ioprio);

View file

@ -584,12 +584,28 @@ static int diskstats_show(struct seq_file *s, void *v)
for (n = 0; n < gp->minors - 1; n++) {
struct hd_struct *hd = gp->part[n];
if (hd && hd->nr_sects)
seq_printf(s, "%4d %4d %s %u %u %u %u\n",
gp->major, n + gp->first_minor + 1,
disk_name(gp, n + 1, buf),
hd->ios[0], hd->sectors[0],
hd->ios[1], hd->sectors[1]);
if (!hd || !hd->nr_sects)
continue;
preempt_disable();
part_round_stats(hd);
preempt_enable();
seq_printf(s, "%4d %4d %s %lu %lu %llu "
"%u %lu %lu %llu %u %u %u %u\n",
gp->major, n + gp->first_minor + 1,
disk_name(gp, n + 1, buf),
part_stat_read(hd, ios[0]),
part_stat_read(hd, merges[0]),
(unsigned long long)part_stat_read(hd, sectors[0]),
jiffies_to_msecs(part_stat_read(hd, ticks[0])),
part_stat_read(hd, ios[1]),
part_stat_read(hd, merges[1]),
(unsigned long long)part_stat_read(hd, sectors[1]),
jiffies_to_msecs(part_stat_read(hd, ticks[1])),
hd->in_flight,
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
);
}
return 0;

View file

@ -751,15 +751,15 @@ gettgt(struct aoedev *d, char *addr)
}
static inline void
diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
{
unsigned long n_sect = bio->bi_size >> 9;
const int rw = bio_data_dir(bio);
disk_stat_inc(disk, ios[rw]);
disk_stat_add(disk, ticks[rw], duration);
disk_stat_add(disk, sectors[rw], n_sect);
disk_stat_add(disk, io_ticks, duration);
all_stat_inc(disk, ios[rw], sector);
all_stat_add(disk, ticks[rw], duration, sector);
all_stat_add(disk, sectors[rw], n_sect, sector);
all_stat_add(disk, io_ticks, duration, sector);
}
void
@ -879,7 +879,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
}
if (buf && --buf->nframesout == 0 && buf->resid == 0) {
diskstats(d->gd, buf->bio, jiffies - buf->stime);
diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
bio_endio(buf->bio, n);
mempool_free(buf, d->bufpool);

View file

@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
#include "check.h"
@ -215,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev,
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%8u %8llu %8u %8llu\n",
p->ios[0], (unsigned long long)p->sectors[0],
p->ios[1], (unsigned long long)p->sectors[1]);
preempt_disable();
part_round_stats(p);
preempt_enable();
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
"%8u %8u %8u"
"\n",
part_stat_read(p, ios[READ]),
part_stat_read(p, merges[READ]),
(unsigned long long)part_stat_read(p, sectors[READ]),
jiffies_to_msecs(part_stat_read(p, ticks[READ])),
part_stat_read(p, ios[WRITE]),
part_stat_read(p, merges[WRITE]),
(unsigned long long)part_stat_read(p, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
p->in_flight,
jiffies_to_msecs(part_stat_read(p, io_ticks)),
jiffies_to_msecs(part_stat_read(p, time_in_queue)));
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@ -273,6 +290,7 @@ static struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
struct hd_struct *p = dev_to_part(dev);
free_part_stats(p);
kfree(p);
}
@ -312,8 +330,7 @@ void delete_partition(struct gendisk *disk, int part)
disk->part[part-1] = NULL;
p->start_sect = 0;
p->nr_sects = 0;
p->ios[0] = p->ios[1] = 0;
p->sectors[0] = p->sectors[1] = 0;
part_stat_set_all(p, 0);
kobject_put(p->holder_dir);
device_del(&p->dev);
put_device(&p->dev);
@ -336,6 +353,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
if (!p)
return;
if (!init_part_stats(p)) {
kfree(p);
return;
}
p->start_sect = start;
p->nr_sects = len;
p->partno = part;

View file

@ -137,7 +137,9 @@ enum rq_flag_bits {
#define BLK_MAX_CDB 16
/*
* try to put the fields that are referenced together in the same cacheline
* try to put the fields that are referenced together in the same cacheline.
* if you modify this structure, be sure to check block/blk-core.c:rq_init()
* as well!
*/
struct request {
struct list_head queuelist;

View file

@ -91,15 +91,30 @@ struct partition {
__le32 nr_sects; /* nr of sectors in partition */
} __attribute__((packed));
struct disk_stats {
unsigned long sectors[2]; /* READs and WRITEs */
unsigned long ios[2];
unsigned long merges[2];
unsigned long ticks[2];
unsigned long io_ticks;
unsigned long time_in_queue;
};
struct hd_struct {
sector_t start_sect;
sector_t nr_sects;
struct device dev;
struct kobject *holder_dir;
unsigned ios[2], sectors[2]; /* READs and WRITEs */
int policy, partno;
#ifdef CONFIG_FAIL_MAKE_REQUEST
int make_it_fail;
#endif
unsigned long stamp;
int in_flight;
#ifdef CONFIG_SMP
struct disk_stats *dkstats;
#else
struct disk_stats dkstats;
#endif
};
@ -111,15 +126,7 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
struct disk_stats {
unsigned long sectors[2]; /* READs and WRITEs */
unsigned long ios[2];
unsigned long merges[2];
unsigned long ticks[2];
unsigned long io_ticks;
unsigned long time_in_queue;
};
struct gendisk {
int major; /* major number of driver */
int first_minor;
@ -158,6 +165,20 @@ struct gendisk {
* The __ variants should only be called in critical sections. The full
* variants disable/enable preemption.
*/
static inline struct hd_struct *get_part(struct gendisk *gendiskp,
sector_t sector)
{
struct hd_struct *part;
int i;
for (i = 0; i < gendiskp->minors - 1; i++) {
part = gendiskp->part[i];
if (part && part->start_sect <= sector
&& sector < part->start_sect + part->nr_sects)
return part;
}
return NULL;
}
#ifdef CONFIG_SMP
#define __disk_stat_add(gendiskp, field, addnd) \
(per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
@ -177,15 +198,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
memset(per_cpu_ptr(gendiskp->dkstats, i), value,
sizeof (struct disk_stats));
}
#define __part_stat_add(part, field, addnd) \
(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
#define __all_stat_add(gendiskp, field, addnd, sector) \
({ \
struct hd_struct *part = get_part(gendiskp, sector); \
if (part) \
__part_stat_add(part, field, addnd); \
__disk_stat_add(gendiskp, field, addnd); \
})
#define part_stat_read(part, field) \
({ \
typeof(part->dkstats->field) res = 0; \
int i; \
for_each_possible_cpu(i) \
res += per_cpu_ptr(part->dkstats, i)->field; \
res; \
})
static inline void part_stat_set_all(struct hd_struct *part, int value) {
int i;
for_each_possible_cpu(i)
memset(per_cpu_ptr(part->dkstats, i), value,
sizeof(struct disk_stats));
}
#else
#define __disk_stat_add(gendiskp, field, addnd) \
(gendiskp->dkstats.field += addnd)
#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field)
static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
{
memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
}
#define __part_stat_add(part, field, addnd) \
(part->dkstats.field += addnd)
#define __all_stat_add(gendiskp, field, addnd, sector) \
({ \
struct hd_struct *part = get_part(gendiskp, sector); \
if (part) \
part->dkstats.field += addnd; \
__disk_stat_add(gendiskp, field, addnd); \
})
#define part_stat_read(part, field) (part->dkstats.field)
static inline void part_stat_set_all(struct hd_struct *part, int value)
{
memset(&part->dkstats, value, sizeof(struct disk_stats));
}
#endif
#define disk_stat_add(gendiskp, field, addnd) \
@ -206,6 +274,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
#define disk_stat_sub(gendiskp, field, subnd) \
disk_stat_add(gendiskp, field, -subnd)
#define part_stat_add(gendiskp, field, addnd) \
do { \
preempt_disable(); \
__part_stat_add(gendiskp, field, addnd);\
preempt_enable(); \
} while (0)
#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
#define __part_stat_sub(gendiskp, field, subnd) \
__part_stat_add(gendiskp, field, -subnd)
#define part_stat_sub(gendiskp, field, subnd) \
part_stat_add(gendiskp, field, -subnd)
#define all_stat_add(gendiskp, field, addnd, sector) \
do { \
preempt_disable(); \
__all_stat_add(gendiskp, field, addnd, sector); \
preempt_enable(); \
} while (0)
#define __all_stat_dec(gendiskp, field, sector) \
__all_stat_add(gendiskp, field, -1, sector)
#define all_stat_dec(gendiskp, field, sector) \
all_stat_add(gendiskp, field, -1, sector)
#define __all_stat_inc(gendiskp, field, sector) \
__all_stat_add(gendiskp, field, 1, sector)
#define all_stat_inc(gendiskp, field, sector) \
all_stat_add(gendiskp, field, 1, sector)
#define __all_stat_sub(gendiskp, field, subnd, sector) \
__all_stat_add(gendiskp, field, -subnd, sector)
#define all_stat_sub(gendiskp, field, subnd, sector) \
all_stat_add(gendiskp, field, -subnd, sector)
/* Inlines to alloc and free disk stats in struct gendisk */
#ifdef CONFIG_SMP
@ -221,6 +328,20 @@ static inline void free_disk_stats(struct gendisk *disk)
{
free_percpu(disk->dkstats);
}
static inline int init_part_stats(struct hd_struct *part)
{
part->dkstats = alloc_percpu(struct disk_stats);
if (!part->dkstats)
return 0;
return 1;
}
static inline void free_part_stats(struct hd_struct *part)
{
free_percpu(part->dkstats);
}
#else /* CONFIG_SMP */
static inline int init_disk_stats(struct gendisk *disk)
{
@ -230,10 +351,20 @@ static inline int init_disk_stats(struct gendisk *disk)
static inline void free_disk_stats(struct gendisk *disk)
{
}
static inline int init_part_stats(struct hd_struct *part)
{
return 1;
}
static inline void free_part_stats(struct hd_struct *part)
{
}
#endif /* CONFIG_SMP */
/* drivers/block/ll_rw_blk.c */
extern void disk_round_stats(struct gendisk *disk);
extern void part_round_stats(struct hd_struct *part);
/* drivers/block/genhd.c */
extern int get_blkdev_list(char *, int);