[PATCH] md: optimise reconstruction when re-adding a recently failed drive.
When an array is degraded, bit in the intent-bitmap are never cleared. So if a recently failed drive is re-added, we only need to reconstruct the block that are still reflected in the bitmap. This patch adds support for this re-adding. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
289e99e8ed
commit
41158c7eb2
3 changed files with 65 additions and 17 deletions
|
@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
mdp_disk_t *desc;
|
mdp_disk_t *desc;
|
||||||
mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
|
mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
|
||||||
|
|
||||||
|
rdev->raid_disk = -1;
|
||||||
|
rdev->in_sync = 0;
|
||||||
if (mddev->raid_disks == 0) {
|
if (mddev->raid_disks == 0) {
|
||||||
mddev->major_version = 0;
|
mddev->major_version = 0;
|
||||||
mddev->minor_version = sb->minor_version;
|
mddev->minor_version = sb->minor_version;
|
||||||
|
@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
|
memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
|
||||||
|
|
||||||
mddev->max_disks = MD_SB_DISKS;
|
mddev->max_disks = MD_SB_DISKS;
|
||||||
} else {
|
} else if (mddev->pers == NULL) {
|
||||||
__u64 ev1;
|
/* Insist on good event counter while assembling */
|
||||||
ev1 = md_event(sb);
|
__u64 ev1 = md_event(sb);
|
||||||
++ev1;
|
++ev1;
|
||||||
if (ev1 < mddev->events)
|
if (ev1 < mddev->events)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
} else if (mddev->bitmap) {
|
||||||
|
/* if adding to array with a bitmap, then we can accept an
|
||||||
|
* older device ... but not too old.
|
||||||
|
*/
|
||||||
|
__u64 ev1 = md_event(sb);
|
||||||
|
if (ev1 < mddev->bitmap->events_cleared)
|
||||||
|
return 0;
|
||||||
|
} else /* just a hot-add of a new device, leave raid_disk at -1 */
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (mddev->level != LEVEL_MULTIPATH) {
|
if (mddev->level != LEVEL_MULTIPATH) {
|
||||||
rdev->raid_disk = -1;
|
rdev->faulty = 0;
|
||||||
rdev->in_sync = rdev->faulty = 0;
|
|
||||||
desc = sb->disks + rdev->desc_nr;
|
desc = sb->disks + rdev->desc_nr;
|
||||||
|
|
||||||
if (desc->state & (1<<MD_DISK_FAULTY))
|
if (desc->state & (1<<MD_DISK_FAULTY))
|
||||||
|
@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
rdev->in_sync = 1;
|
rdev->in_sync = 1;
|
||||||
rdev->raid_disk = desc->raid_disk;
|
rdev->raid_disk = desc->raid_disk;
|
||||||
}
|
}
|
||||||
}
|
} else /* MULTIPATH are always insync */
|
||||||
|
rdev->in_sync = 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
{
|
{
|
||||||
struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
|
struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
|
||||||
|
|
||||||
|
rdev->raid_disk = -1;
|
||||||
|
rdev->in_sync = 0;
|
||||||
if (mddev->raid_disks == 0) {
|
if (mddev->raid_disks == 0) {
|
||||||
mddev->major_version = 1;
|
mddev->major_version = 1;
|
||||||
mddev->patch_version = 0;
|
mddev->patch_version = 0;
|
||||||
|
@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
memcpy(mddev->uuid, sb->set_uuid, 16);
|
memcpy(mddev->uuid, sb->set_uuid, 16);
|
||||||
|
|
||||||
mddev->max_disks = (4096-256)/2;
|
mddev->max_disks = (4096-256)/2;
|
||||||
} else {
|
} else if (mddev->pers == NULL) {
|
||||||
__u64 ev1;
|
/* Insist of good event counter while assembling */
|
||||||
ev1 = le64_to_cpu(sb->events);
|
__u64 ev1 = le64_to_cpu(sb->events);
|
||||||
++ev1;
|
++ev1;
|
||||||
if (ev1 < mddev->events)
|
if (ev1 < mddev->events)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
} else if (mddev->bitmap) {
|
||||||
|
/* If adding to array with a bitmap, then we can accept an
|
||||||
|
* older device, but not too old.
|
||||||
|
*/
|
||||||
|
__u64 ev1 = le64_to_cpu(sb->events);
|
||||||
|
if (ev1 < mddev->bitmap->events_cleared)
|
||||||
|
return 0;
|
||||||
|
} else /* just a hot-add of a new device, leave raid_disk at -1 */
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (mddev->level != LEVEL_MULTIPATH) {
|
if (mddev->level != LEVEL_MULTIPATH) {
|
||||||
int role;
|
int role;
|
||||||
|
@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||||
switch(role) {
|
switch(role) {
|
||||||
case 0xffff: /* spare */
|
case 0xffff: /* spare */
|
||||||
rdev->in_sync = 0;
|
|
||||||
rdev->faulty = 0;
|
rdev->faulty = 0;
|
||||||
rdev->raid_disk = -1;
|
|
||||||
break;
|
break;
|
||||||
case 0xfffe: /* faulty */
|
case 0xfffe: /* faulty */
|
||||||
rdev->in_sync = 0;
|
|
||||||
rdev->faulty = 1;
|
rdev->faulty = 1;
|
||||||
rdev->raid_disk = -1;
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
rdev->in_sync = 1;
|
rdev->in_sync = 1;
|
||||||
|
@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
rdev->raid_disk = role;
|
rdev->raid_disk = role;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
} else /* MULTIPATH are always insync */
|
||||||
|
rdev->in_sync = 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2155,6 +2174,18 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
|
||||||
PTR_ERR(rdev));
|
PTR_ERR(rdev));
|
||||||
return PTR_ERR(rdev);
|
return PTR_ERR(rdev);
|
||||||
}
|
}
|
||||||
|
/* set save_raid_disk if appropriate */
|
||||||
|
if (!mddev->persistent) {
|
||||||
|
if (info->state & (1<<MD_DISK_SYNC) &&
|
||||||
|
info->raid_disk < mddev->raid_disks)
|
||||||
|
rdev->raid_disk = info->raid_disk;
|
||||||
|
else
|
||||||
|
rdev->raid_disk = -1;
|
||||||
|
} else
|
||||||
|
super_types[mddev->major_version].
|
||||||
|
validate_super(mddev, rdev);
|
||||||
|
rdev->saved_raid_disk = rdev->raid_disk;
|
||||||
|
|
||||||
rdev->in_sync = 0; /* just to be sure */
|
rdev->in_sync = 0; /* just to be sure */
|
||||||
rdev->raid_disk = -1;
|
rdev->raid_disk = -1;
|
||||||
err = bind_rdev_to_array(rdev, mddev);
|
err = bind_rdev_to_array(rdev, mddev);
|
||||||
|
@ -3706,6 +3737,14 @@ void md_check_recovery(mddev_t *mddev)
|
||||||
mddev->pers->spare_active(mddev);
|
mddev->pers->spare_active(mddev);
|
||||||
}
|
}
|
||||||
md_update_sb(mddev);
|
md_update_sb(mddev);
|
||||||
|
|
||||||
|
/* if array is no-longer degraded, then any saved_raid_disk
|
||||||
|
* information must be scrapped
|
||||||
|
*/
|
||||||
|
if (!mddev->degraded)
|
||||||
|
ITERATE_RDEV(mddev,rdev,rtmp)
|
||||||
|
rdev->saved_raid_disk = -1;
|
||||||
|
|
||||||
mddev->recovery = 0;
|
mddev->recovery = 0;
|
||||||
/* flag recovery needed just to double check */
|
/* flag recovery needed just to double check */
|
||||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
|
|
@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
{
|
{
|
||||||
conf_t *conf = mddev->private;
|
conf_t *conf = mddev->private;
|
||||||
int found = 0;
|
int found = 0;
|
||||||
int mirror;
|
int mirror = 0;
|
||||||
mirror_info_t *p;
|
mirror_info_t *p;
|
||||||
|
|
||||||
|
if (rdev->saved_raid_disk >= 0 &&
|
||||||
|
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
|
||||||
|
mirror = rdev->saved_raid_disk;
|
||||||
for (mirror=0; mirror < mddev->raid_disks; mirror++)
|
for (mirror=0; mirror < mddev->raid_disks; mirror++)
|
||||||
if ( !(p=conf->mirrors+mirror)->rdev) {
|
if ( !(p=conf->mirrors+mirror)->rdev) {
|
||||||
|
|
||||||
|
@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
p->head_position = 0;
|
p->head_position = 0;
|
||||||
rdev->raid_disk = mirror;
|
rdev->raid_disk = mirror;
|
||||||
found = 1;
|
found = 1;
|
||||||
|
if (rdev->saved_raid_disk != mirror)
|
||||||
|
conf->fullsync = 1;
|
||||||
p->rdev = rdev;
|
p->rdev = rdev;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -183,6 +183,10 @@ struct mdk_rdev_s
|
||||||
|
|
||||||
int desc_nr; /* descriptor index in the superblock */
|
int desc_nr; /* descriptor index in the superblock */
|
||||||
int raid_disk; /* role of device in array */
|
int raid_disk; /* role of device in array */
|
||||||
|
int saved_raid_disk; /* role that device used to have in the
|
||||||
|
* array and could again if we did a partial
|
||||||
|
* resync from the bitmap
|
||||||
|
*/
|
||||||
|
|
||||||
atomic_t nr_pending; /* number of pending requests.
|
atomic_t nr_pending; /* number of pending requests.
|
||||||
* only maintained for arrays that
|
* only maintained for arrays that
|
||||||
|
|
Loading…
Reference in a new issue