Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: Use revalidate_disk to effect changes in size of device.
  md: allow raid5_quiesce to work properly when reshape is happening.
  md/raid5: set reshape_position correctly when reshape starts.
  md: Handle growth of v1.x metadata correctly.
  md: avoid array overflow with bad v1.x metadata
  md: when a level change reduces the number of devices, remove the excess.
  md: Push down data integrity code to personalities.
  md/raid6: release spare page at ->stop()
This commit is contained in:
Linus Torvalds 2009-08-02 21:31:40 -07:00
commit a33a052f19
8 changed files with 133 additions and 85 deletions

View file

@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev)
mddev->queue->unplug_fn = linear_unplug;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
md_integrity_register(mddev);
return 0;
}
@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
call_rcu(&oldconf->rcu, free_conf);
return 0;
}

View file

@ -1308,7 +1308,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
}
if (mddev->level != LEVEL_MULTIPATH) {
int role;
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
if (rdev->desc_nr < 0 ||
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
role = 0xffff;
rdev->desc_nr = -1;
} else
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
switch(role) {
case 0xffff: /* spare */
break;
@ -1394,8 +1399,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1;
if (max_dev > le32_to_cpu(sb->max_dev))
if (max_dev > le32_to_cpu(sb->max_dev)) {
int bmask;
sb->max_dev = cpu_to_le32(max_dev);
rdev->sb_size = max_dev * 2 + 256;
bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
if (rdev->sb_size & bmask)
rdev->sb_size = (rdev->sb_size | bmask) + 1;
}
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
@ -1487,37 +1498,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
static LIST_HEAD(pending_raid_disks);
static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev)
/*
* Try to register data integrity profile for an mddev
*
* This is called when an array is started and after a disk has been kicked
* from the array. It only succeeds if all working and active component devices
* are integrity capable with matching profiles.
*/
int md_integrity_register(mddev_t *mddev)
{
struct mdk_personality *pers = mddev->pers;
struct gendisk *disk = mddev->gendisk;
struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
struct blk_integrity *bi_mddev = blk_get_integrity(disk);
mdk_rdev_t *rdev, *reference = NULL;
/* Data integrity passthrough not supported on RAID 4, 5 and 6 */
if (pers && pers->level >= 4 && pers->level <= 6)
return;
/* If rdev is integrity capable, register profile for mddev */
if (!bi_mddev && bi_rdev) {
if (blk_integrity_register(disk, bi_rdev))
printk(KERN_ERR "%s: %s Could not register integrity!\n",
__func__, disk->disk_name);
else
printk(KERN_NOTICE "Enabling data integrity on %s\n",
disk->disk_name);
return;
if (list_empty(&mddev->disks))
return 0; /* nothing to do */
if (blk_get_integrity(mddev->gendisk))
return 0; /* already registered */
list_for_each_entry(rdev, &mddev->disks, same_set) {
/* skip spares and non-functional disks */
if (test_bit(Faulty, &rdev->flags))
continue;
if (rdev->raid_disk < 0)
continue;
/*
* If at least one rdev is not integrity capable, we can not
* enable data integrity for the md device.
*/
if (!bdev_get_integrity(rdev->bdev))
return -EINVAL;
if (!reference) {
/* Use the first rdev as the reference */
reference = rdev;
continue;
}
/* does this rdev's profile match the reference profile? */
if (blk_integrity_compare(reference->bdev->bd_disk,
rdev->bdev->bd_disk) < 0)
return -EINVAL;
}
/* Check that mddev and rdev have matching profiles */
if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) {
printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__,
disk->disk_name, rdev->bdev->bd_disk->disk_name);
printk(KERN_NOTICE "Disabling data integrity on %s\n",
disk->disk_name);
blk_integrity_unregister(disk);
/*
* All component devices are integrity capable and have matching
* profiles, register the common profile for the md device.
*/
if (blk_integrity_register(mddev->gendisk,
bdev_get_integrity(reference->bdev)) != 0) {
printk(KERN_ERR "md: failed to register integrity for %s\n",
mdname(mddev));
return -EINVAL;
}
printk(KERN_NOTICE "md: data integrity on %s enabled\n",
mdname(mddev));
return 0;
}
EXPORT_SYMBOL(md_integrity_register);
/* Disable data integrity if non-capable/non-matching disk is being added */
void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
{
struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
if (!bi_mddev) /* nothing to do */
return;
if (rdev->raid_disk < 0) /* skip spares */
return;
if (bi_rdev && blk_integrity_compare(mddev->gendisk,
rdev->bdev->bd_disk) >= 0)
return;
printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
blk_integrity_unregister(mddev->gendisk);
}
EXPORT_SYMBOL(md_integrity_add_rdev);
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
{
@ -1591,7 +1641,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
md_integrity_check(rdev, mddev);
return 0;
fail:
@ -2657,6 +2706,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
ssize_t rv = len;
struct mdk_personality *pers;
void *priv;
mdk_rdev_t *rdev;
if (mddev->pers == NULL) {
if (len == 0)
@ -2736,6 +2786,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
mddev_suspend(mddev);
mddev->pers->stop(mddev);
module_put(mddev->pers->owner);
/* Invalidate devices that are now superfluous */
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= mddev->raid_disks) {
rdev->raid_disk = -1;
clear_bit(In_sync, &rdev->flags);
}
mddev->pers = pers;
mddev->private = priv;
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
@ -3685,17 +3741,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
mddev->array_sectors = sectors;
set_capacity(mddev->gendisk, mddev->array_sectors);
if (mddev->pers) {
struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode,
(loff_t)mddev->array_sectors << 9);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
}
if (mddev->pers)
revalidate_disk(mddev->gendisk);
return len;
}
@ -4048,10 +4095,6 @@ static int do_md_run(mddev_t * mddev)
}
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
if (pers->level >= 4 && pers->level <= 6)
/* Cannot support integrity (yet) */
blk_integrity_unregister(mddev->gendisk);
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
@ -4189,6 +4232,7 @@ static int do_md_run(mddev_t * mddev)
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
revalidate_disk(mddev->gendisk);
mddev->changed = 1;
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
@ -5087,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
return -ENOSPC;
}
rv = mddev->pers->resize(mddev, num_sectors);
if (!rv) {
struct block_device *bdev;
bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode,
(loff_t)mddev->array_sectors << 9);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
}
if (!rv)
revalidate_disk(mddev->gendisk);
return rv;
}

View file

@ -431,5 +431,7 @@ extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
extern int md_check_no_bitmap(mddev_t *mddev);
extern int md_integrity_register(mddev_t *mddev);
void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
#endif /* _MD_MD_H */

View file

@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
set_bit(In_sync, &rdev->flags);
rcu_assign_pointer(p->rdev, rdev);
err = 0;
md_integrity_add_rdev(rdev, mddev);
break;
}
@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
md_integrity_register(mddev);
}
abort:
@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev)
mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
md_integrity_register(mddev);
return 0;
out_free_conf:

View file

@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev)
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
dump_zones(mddev);
md_integrity_register(mddev);
return 0;
}

View file

@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
rcu_assign_pointer(p->rdev, rdev);
break;
}
md_integrity_add_rdev(rdev, mddev);
print_conf(conf);
return err;
}
@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
md_integrity_register(mddev);
}
abort:
@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev)
mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
md_integrity_register(mddev);
return 0;
out_no_mem:
@ -2132,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->dev_sectors;

View file

@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
break;
}
md_integrity_add_rdev(rdev, mddev);
print_conf(conf);
return err;
}
@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
md_integrity_register(mddev);
}
abort:
@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev)
if (conf->near_copies < mddev->raid_disks)
blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
md_integrity_register(mddev);
return 0;
out_free_conf:

View file

@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
return 0;
}
/* Allow raid5_quiesce to complete */
wait_event(conf->wait_for_overlap, conf->quiesce != 2);
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return reshape_request(mddev, sector_nr, skipped);
@ -4316,6 +4319,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
return sectors * (raid_disks - conf->max_degraded);
}
static void free_conf(raid5_conf_t *conf)
{
shrink_stripes(conf);
safe_put_page(conf->spare_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
}
static raid5_conf_t *setup_conf(mddev_t *mddev)
{
raid5_conf_t *conf;
@ -4447,11 +4459,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
abort:
if (conf) {
shrink_stripes(conf);
safe_put_page(conf->spare_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
free_conf(conf);
return ERR_PTR(-EIO);
} else
return ERR_PTR(-ENOMEM);
@ -4629,12 +4637,8 @@ static int run(mddev_t *mddev)
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
if (conf) {
shrink_stripes(conf);
print_raid5_conf(conf);
safe_put_page(conf->spare_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
free_conf(conf);
}
mddev->private = NULL;
printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
@ -4649,13 +4653,10 @@ static int stop(mddev_t *mddev)
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
shrink_stripes(conf);
kfree(conf->stripe_hashtbl);
mddev->queue->backing_dev_info.congested_fn = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
kfree(conf->disks);
kfree(conf);
free_conf(conf);
mddev->private = NULL;
return 0;
}
@ -4857,6 +4858,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@ -5002,7 +5004,7 @@ static int raid5_start_reshape(mddev_t *mddev)
spin_unlock_irqrestore(&conf->device_lock, flags);
}
mddev->raid_disks = conf->raid_disks;
mddev->reshape_position = 0;
mddev->reshape_position = conf->reshape_progress;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@ -5057,7 +5059,6 @@ static void end_reshape(raid5_conf_t *conf)
*/
static void raid5_finish_reshape(mddev_t *mddev)
{
struct block_device *bdev;
raid5_conf_t *conf = mddev->private;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@ -5066,15 +5067,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode,
(loff_t)mddev->array_sectors << 9);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
revalidate_disk(mddev->gendisk);
} else {
int d;
mddev->degraded = conf->raid_disks;
@ -5106,12 +5099,18 @@ static void raid5_quiesce(mddev_t *mddev, int state)
case 1: /* stop all writes */
spin_lock_irq(&conf->device_lock);
conf->quiesce = 1;
/* '2' tells resync/reshape to pause so that all
* active stripes can drain
*/
conf->quiesce = 2;
wait_event_lock_irq(conf->wait_for_stripe,
atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0,
conf->device_lock, /* nothing */);
conf->quiesce = 1;
spin_unlock_irq(&conf->device_lock);
/* allow reshape to continue */
wake_up(&conf->wait_for_overlap);
break;
case 0: /* re-enable writes */