md: add proper merge_bvec handling to RAID0 and Linear.
These personalities currently set a max request size of one page when any member device has a merge_bvec_fn because they don't bother to call that function. This causes extra works in splitting and combining requests. So make the extra effort to call the merge_bvec_fn when it exists so that we end up with larger requests out the bottom. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
dafb20fa34
commit
ba13da47ff
3 changed files with 107 additions and 88 deletions
|
@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q,
|
||||||
struct dev_info *dev0;
|
struct dev_info *dev0;
|
||||||
unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
|
unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
|
||||||
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
|
int maxbytes = biovec->bv_len;
|
||||||
|
struct request_queue *subq;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
dev0 = which_dev(mddev, sector);
|
dev0 = which_dev(mddev, sector);
|
||||||
maxsectors = dev0->end_sector - sector;
|
maxsectors = dev0->end_sector - sector;
|
||||||
|
subq = bdev_get_queue(dev0->rdev->bdev);
|
||||||
|
if (subq->merge_bvec_fn) {
|
||||||
|
bvm->bi_bdev = dev0->rdev->bdev;
|
||||||
|
bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors;
|
||||||
|
maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm,
|
||||||
|
biovec));
|
||||||
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (maxsectors < bio_sectors)
|
if (maxsectors < bio_sectors)
|
||||||
|
@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q,
|
||||||
maxsectors -= bio_sectors;
|
maxsectors -= bio_sectors;
|
||||||
|
|
||||||
if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
|
if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
|
||||||
return biovec->bv_len;
|
return maxbytes;
|
||||||
/* The bytes available at this offset could be really big,
|
|
||||||
* so we cap at 2^31 to avoid overflow */
|
if (maxsectors > (maxbytes >> 9))
|
||||||
if (maxsectors > (1 << (31-9)))
|
return maxbytes;
|
||||||
return 1<<31;
|
else
|
||||||
return maxsectors << 9;
|
return maxsectors << 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int linear_congested(void *data, int bits)
|
static int linear_congested(void *data, int bits)
|
||||||
|
@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit max_segments to 1 lying within
|
|
||||||
* a single page.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
conf->array_sectors += rdev->sectors;
|
conf->array_sectors += rdev->sectors;
|
||||||
cnt++;
|
cnt++;
|
||||||
|
|
|
@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev1->bdev,
|
disk_stack_limits(mddev->gendisk, rdev1->bdev,
|
||||||
rdev1->data_offset << 9);
|
rdev1->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit ->max_segments to 1, lying within
|
|
||||||
* a single page.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
|
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
conf->has_merge_bvec = 1;
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
if (!smallest || (rdev1->sectors < smallest->sectors))
|
if (!smallest || (rdev1->sectors < smallest->sectors))
|
||||||
smallest = rdev1;
|
smallest = rdev1;
|
||||||
cnt++;
|
cnt++;
|
||||||
|
@ -290,8 +284,64 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Find the zone which holds a particular offset
|
||||||
|
* Update *sectorp to be an offset in that zone
|
||||||
|
*/
|
||||||
|
static struct strip_zone *find_zone(struct r0conf *conf,
|
||||||
|
sector_t *sectorp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct strip_zone *z = conf->strip_zone;
|
||||||
|
sector_t sector = *sectorp;
|
||||||
|
|
||||||
|
for (i = 0; i < conf->nr_strip_zones; i++)
|
||||||
|
if (sector < z[i].zone_end) {
|
||||||
|
if (i)
|
||||||
|
*sectorp = sector - z[i-1].zone_end;
|
||||||
|
return z + i;
|
||||||
|
}
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* remaps the bio to the target device. we separate two flows.
|
||||||
|
* power 2 flow and a general flow for the sake of perfromance
|
||||||
|
*/
|
||||||
|
static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
|
||||||
|
sector_t sector, sector_t *sector_offset)
|
||||||
|
{
|
||||||
|
unsigned int sect_in_chunk;
|
||||||
|
sector_t chunk;
|
||||||
|
struct r0conf *conf = mddev->private;
|
||||||
|
int raid_disks = conf->strip_zone[0].nb_dev;
|
||||||
|
unsigned int chunk_sects = mddev->chunk_sectors;
|
||||||
|
|
||||||
|
if (is_power_of_2(chunk_sects)) {
|
||||||
|
int chunksect_bits = ffz(~chunk_sects);
|
||||||
|
/* find the sector offset inside the chunk */
|
||||||
|
sect_in_chunk = sector & (chunk_sects - 1);
|
||||||
|
sector >>= chunksect_bits;
|
||||||
|
/* chunk in zone */
|
||||||
|
chunk = *sector_offset;
|
||||||
|
/* quotient is the chunk in real device*/
|
||||||
|
sector_div(chunk, zone->nb_dev << chunksect_bits);
|
||||||
|
} else{
|
||||||
|
sect_in_chunk = sector_div(sector, chunk_sects);
|
||||||
|
chunk = *sector_offset;
|
||||||
|
sector_div(chunk, chunk_sects * zone->nb_dev);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* position the bio over the real device
|
||||||
|
* real sector = chunk in device + starting of zone
|
||||||
|
* + the position in the chunk
|
||||||
|
*/
|
||||||
|
*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
|
||||||
|
return conf->devlist[(zone - conf->strip_zone)*raid_disks
|
||||||
|
+ sector_div(sector, zone->nb_dev)];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
|
* raid0_mergeable_bvec -- tell bio layer if two requests can be merged
|
||||||
* @q: request queue
|
* @q: request queue
|
||||||
* @bvm: properties of new bio
|
* @bvm: properties of new bio
|
||||||
* @biovec: the request that could be merged to it.
|
* @biovec: the request that could be merged to it.
|
||||||
|
@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q,
|
||||||
struct bio_vec *biovec)
|
struct bio_vec *biovec)
|
||||||
{
|
{
|
||||||
struct mddev *mddev = q->queuedata;
|
struct mddev *mddev = q->queuedata;
|
||||||
|
struct r0conf *conf = mddev->private;
|
||||||
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
|
sector_t sector_offset = sector;
|
||||||
int max;
|
int max;
|
||||||
unsigned int chunk_sectors = mddev->chunk_sectors;
|
unsigned int chunk_sectors = mddev->chunk_sectors;
|
||||||
unsigned int bio_sectors = bvm->bi_size >> 9;
|
unsigned int bio_sectors = bvm->bi_size >> 9;
|
||||||
|
struct strip_zone *zone;
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
struct request_queue *subq;
|
||||||
|
|
||||||
if (is_power_of_2(chunk_sectors))
|
if (is_power_of_2(chunk_sectors))
|
||||||
max = (chunk_sectors - ((sector & (chunk_sectors-1))
|
max = (chunk_sectors - ((sector & (chunk_sectors-1))
|
||||||
|
@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q,
|
||||||
else
|
else
|
||||||
max = (chunk_sectors - (sector_div(sector, chunk_sectors)
|
max = (chunk_sectors - (sector_div(sector, chunk_sectors)
|
||||||
+ bio_sectors)) << 9;
|
+ bio_sectors)) << 9;
|
||||||
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
|
if (max < 0)
|
||||||
|
max = 0; /* bio_add cannot handle a negative return */
|
||||||
if (max <= biovec->bv_len && bio_sectors == 0)
|
if (max <= biovec->bv_len && bio_sectors == 0)
|
||||||
return biovec->bv_len;
|
return biovec->bv_len;
|
||||||
else
|
if (max < biovec->bv_len)
|
||||||
|
/* too small already, no need to check further */
|
||||||
|
return max;
|
||||||
|
if (!conf->has_merge_bvec)
|
||||||
|
return max;
|
||||||
|
|
||||||
|
/* May need to check subordinate device */
|
||||||
|
sector = sector_offset;
|
||||||
|
zone = find_zone(mddev->private, §or_offset);
|
||||||
|
rdev = map_sector(mddev, zone, sector, §or_offset);
|
||||||
|
subq = bdev_get_queue(rdev->bdev);
|
||||||
|
if (subq->merge_bvec_fn) {
|
||||||
|
bvm->bi_bdev = rdev->bdev;
|
||||||
|
bvm->bi_sector = sector_offset + zone->dev_start +
|
||||||
|
rdev->data_offset;
|
||||||
|
return min(max, subq->merge_bvec_fn(subq, bvm, biovec));
|
||||||
|
} else
|
||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the zone which holds a particular offset
|
|
||||||
* Update *sectorp to be an offset in that zone
|
|
||||||
*/
|
|
||||||
static struct strip_zone *find_zone(struct r0conf *conf,
|
|
||||||
sector_t *sectorp)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
struct strip_zone *z = conf->strip_zone;
|
|
||||||
sector_t sector = *sectorp;
|
|
||||||
|
|
||||||
for (i = 0; i < conf->nr_strip_zones; i++)
|
|
||||||
if (sector < z[i].zone_end) {
|
|
||||||
if (i)
|
|
||||||
*sectorp = sector - z[i-1].zone_end;
|
|
||||||
return z + i;
|
|
||||||
}
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* remaps the bio to the target device. we separate two flows.
|
|
||||||
* power 2 flow and a general flow for the sake of perfromance
|
|
||||||
*/
|
|
||||||
static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
|
|
||||||
sector_t sector, sector_t *sector_offset)
|
|
||||||
{
|
|
||||||
unsigned int sect_in_chunk;
|
|
||||||
sector_t chunk;
|
|
||||||
struct r0conf *conf = mddev->private;
|
|
||||||
int raid_disks = conf->strip_zone[0].nb_dev;
|
|
||||||
unsigned int chunk_sects = mddev->chunk_sectors;
|
|
||||||
|
|
||||||
if (is_power_of_2(chunk_sects)) {
|
|
||||||
int chunksect_bits = ffz(~chunk_sects);
|
|
||||||
/* find the sector offset inside the chunk */
|
|
||||||
sect_in_chunk = sector & (chunk_sects - 1);
|
|
||||||
sector >>= chunksect_bits;
|
|
||||||
/* chunk in zone */
|
|
||||||
chunk = *sector_offset;
|
|
||||||
/* quotient is the chunk in real device*/
|
|
||||||
sector_div(chunk, zone->nb_dev << chunksect_bits);
|
|
||||||
} else{
|
|
||||||
sect_in_chunk = sector_div(sector, chunk_sects);
|
|
||||||
chunk = *sector_offset;
|
|
||||||
sector_div(chunk, chunk_sects * zone->nb_dev);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* position the bio over the real device
|
|
||||||
* real sector = chunk in device + starting of zone
|
|
||||||
* + the position in the chunk
|
|
||||||
*/
|
|
||||||
*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
|
|
||||||
return conf->devlist[(zone - conf->strip_zone)*raid_disks
|
|
||||||
+ sector_div(sector, zone->nb_dev)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is io distribute over 1 or more chunks ?
|
* Is io distribute over 1 or more chunks ?
|
||||||
*/
|
*/
|
||||||
|
@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
sector_offset = bio->bi_sector;
|
sector_offset = bio->bi_sector;
|
||||||
zone = find_zone(mddev->private, §or_offset);
|
zone = find_zone(mddev->private, §or_offset);
|
||||||
tmp_dev = map_sector(mddev, zone, bio->bi_sector,
|
tmp_dev = map_sector(mddev, zone, bio->bi_sector,
|
||||||
§or_offset);
|
§or_offset);
|
||||||
bio->bi_bdev = tmp_dev->bdev;
|
bio->bi_bdev = tmp_dev->bdev;
|
||||||
|
|
|
@ -4,13 +4,16 @@
|
||||||
struct strip_zone {
|
struct strip_zone {
|
||||||
sector_t zone_end; /* Start of the next zone (in sectors) */
|
sector_t zone_end; /* Start of the next zone (in sectors) */
|
||||||
sector_t dev_start; /* Zone offset in real dev (in sectors) */
|
sector_t dev_start; /* Zone offset in real dev (in sectors) */
|
||||||
int nb_dev; /* # of devices attached to the zone */
|
int nb_dev; /* # of devices attached to the zone */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct r0conf {
|
struct r0conf {
|
||||||
struct strip_zone *strip_zone;
|
struct strip_zone *strip_zone;
|
||||||
struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
|
struct md_rdev **devlist; /* lists of rdevs, pointed to
|
||||||
int nr_strip_zones;
|
* by strip_zone->dev */
|
||||||
|
int nr_strip_zones;
|
||||||
|
int has_merge_bvec; /* at least one member has
|
||||||
|
* a merge_bvec_fn */
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Reference in a new issue