Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (52 commits) md: Protect access to mddev->disks list using RCU md: only count actual openers as access which prevent a 'stop' md: linear: Make array_size sector-based and rename it to array_sectors. md: Make mddev->array_size sector-based. md: Make super_type->rdev_size_change() take sector-based sizes. md: Fix check for overlapping devices. md: Tidy up rdev_size_store a bit: md: Remove some unused macros. md: Turn rdev->sb_offset into a sector-based quantity. md: Make calc_dev_sboffset() return a sector count. md: Replace calc_dev_size() by calc_num_sectors(). md: Make update_size() take the number of sectors. md: Better control of when do_md_stop is allowed to stop the array. md: get_disk_info(): Don't convert between signed and unsigned and back. md: Simplify restart_array(). md: alloc_disk_sb(): Return proper error value. md: Simplify sb_equal(). md: Simplify uuid_equal(). md: sb_equal(): Fix misleading printk. md: Fix a typo in the comment to cmd_match(). ...
This commit is contained in:
commit
8a392625b6
16 changed files with 843 additions and 791 deletions
|
@ -236,6 +236,11 @@ All md devices contain:
|
|||
writing the word for the desired state, however some states
|
||||
cannot be explicitly set, and some transitions are not allowed.
|
||||
|
||||
Select/poll works on this file. All changes except between
|
||||
active_idle and active (which can be frequent and are not
|
||||
very interesting) are notified. active->active_idle is
|
||||
reported if the metadata is externally managed.
|
||||
|
||||
clear
|
||||
No devices, no size, no level
|
||||
Writing is equivalent to STOP_ARRAY ioctl
|
||||
|
@ -292,6 +297,10 @@ Each directory contains:
|
|||
writemostly - device will only be subject to read
|
||||
requests if there are no other options.
|
||||
This applies only to raid1 arrays.
|
||||
blocked - device has failed, metadata is "external",
|
||||
and the failure hasn't been acknowledged yet.
|
||||
Writes that would write to this device if
|
||||
it were not faulty are blocked.
|
||||
spare - device is working, but not a full member.
|
||||
This includes spares that are in the process
|
||||
of being recovered to
|
||||
|
@ -301,6 +310,12 @@ Each directory contains:
|
|||
Writing "remove" removes the device from the array.
|
||||
Writing "writemostly" sets the writemostly flag.
|
||||
Writing "-writemostly" clears the writemostly flag.
|
||||
Writing "blocked" sets the "blocked" flag.
|
||||
Writing "-blocked" clear the "blocked" flag and allows writes
|
||||
to complete.
|
||||
|
||||
This file responds to select/poll. Any change to 'faulty'
|
||||
or 'blocked' causes an event.
|
||||
|
||||
errors
|
||||
An approximate count of read errors that have been detected on
|
||||
|
@ -332,7 +347,7 @@ Each directory contains:
|
|||
for storage of data. This will normally be the same as the
|
||||
component_size. This can be written while assembling an
|
||||
array. If a value less than the current component_size is
|
||||
written, component_size will be reduced to this value.
|
||||
written, it will be rejected.
|
||||
|
||||
|
||||
An active md device will also contain and entry for each active device
|
||||
|
@ -381,6 +396,19 @@ also have
|
|||
'check' and 'repair' will start the appropriate process
|
||||
providing the current state is 'idle'.
|
||||
|
||||
This file responds to select/poll. Any important change in the value
|
||||
triggers a poll event. Sometimes the value will briefly be
|
||||
"recover" if a recovery seems to be needed, but cannot be
|
||||
achieved. In that case, the transition to "recover" isn't
|
||||
notified, but the transition away is.
|
||||
|
||||
degraded
|
||||
This contains a count of the number of devices by which the
|
||||
arrays is degraded. So an optimal array with show '0'. A
|
||||
single failed/missing drive will show '1', etc.
|
||||
This file responds to select/poll, any increase or decrease
|
||||
in the count of missing devices will trigger an event.
|
||||
|
||||
mismatch_count
|
||||
When performing 'check' and 'repair', and possibly when
|
||||
performing 'resync', md will count the number of errors that are
|
||||
|
|
|
@ -225,7 +225,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
|
|||
|| test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
|
||||
target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
|
||||
target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
|
||||
|
||||
if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
|
||||
page->index = index;
|
||||
|
@ -241,10 +241,10 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
|
|||
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
mdk_rdev_t *rdev;
|
||||
struct list_head *tmp;
|
||||
mddev_t *mddev = bitmap->mddev;
|
||||
|
||||
rdev_for_each(rdev, tmp, mddev)
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev)
|
||||
if (test_bit(In_sync, &rdev->flags)
|
||||
&& !test_bit(Faulty, &rdev->flags)) {
|
||||
int size = PAGE_SIZE;
|
||||
|
@ -260,32 +260,37 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
|||
+ (long)(page->index * (PAGE_SIZE/512))
|
||||
+ size/512 > 0)
|
||||
/* bitmap runs in to metadata */
|
||||
return -EINVAL;
|
||||
goto bad_alignment;
|
||||
if (rdev->data_offset + mddev->size*2
|
||||
> rdev->sb_offset*2 + bitmap->offset)
|
||||
> rdev->sb_start + bitmap->offset)
|
||||
/* data runs in to bitmap */
|
||||
return -EINVAL;
|
||||
} else if (rdev->sb_offset*2 < rdev->data_offset) {
|
||||
goto bad_alignment;
|
||||
} else if (rdev->sb_start < rdev->data_offset) {
|
||||
/* METADATA BITMAP DATA */
|
||||
if (rdev->sb_offset*2
|
||||
if (rdev->sb_start
|
||||
+ bitmap->offset
|
||||
+ page->index*(PAGE_SIZE/512) + size/512
|
||||
> rdev->data_offset)
|
||||
/* bitmap runs in to data */
|
||||
return -EINVAL;
|
||||
goto bad_alignment;
|
||||
} else {
|
||||
/* DATA METADATA BITMAP - no problems */
|
||||
}
|
||||
md_super_write(mddev, rdev,
|
||||
(rdev->sb_offset<<1) + bitmap->offset
|
||||
rdev->sb_start + bitmap->offset
|
||||
+ page->index * (PAGE_SIZE/512),
|
||||
size,
|
||||
page);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (wait)
|
||||
md_super_wait(mddev);
|
||||
return 0;
|
||||
|
||||
bad_alignment:
|
||||
rcu_read_unlock();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void bitmap_file_kick(struct bitmap *bitmap);
|
||||
|
@ -454,8 +459,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
|
|||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||
sb->events = cpu_to_le64(bitmap->mddev->events);
|
||||
if (!bitmap->mddev->degraded)
|
||||
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
|
||||
if (bitmap->mddev->events < bitmap->events_cleared) {
|
||||
/* rocking back to read-only */
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
|
||||
}
|
||||
kunmap_atomic(sb, KM_USER0);
|
||||
write_page(bitmap, bitmap->sb_page, 1);
|
||||
}
|
||||
|
@ -1085,9 +1093,19 @@ void bitmap_daemon_work(struct bitmap *bitmap)
|
|||
} else
|
||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||
lastpage = page;
|
||||
/*
|
||||
printk("bitmap clean at page %lu\n", j);
|
||||
*/
|
||||
|
||||
/* We are possibly going to clear some bits, so make
|
||||
* sure that events_cleared is up-to-date.
|
||||
*/
|
||||
if (bitmap->need_sync) {
|
||||
bitmap_super_t *sb;
|
||||
bitmap->need_sync = 0;
|
||||
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||
sb->events_cleared =
|
||||
cpu_to_le64(bitmap->events_cleared);
|
||||
kunmap_atomic(sb, KM_USER0);
|
||||
write_page(bitmap, bitmap->sb_page, 1);
|
||||
}
|
||||
spin_lock_irqsave(&bitmap->lock, flags);
|
||||
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
||||
}
|
||||
|
@ -1257,6 +1275,12 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
|
|||
return;
|
||||
}
|
||||
|
||||
if (success &&
|
||||
bitmap->events_cleared < bitmap->mddev->events) {
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->need_sync = 1;
|
||||
}
|
||||
|
||||
if (!success && ! (*bmc & NEEDED_MASK))
|
||||
*bmc |= NEEDED_MASK;
|
||||
|
||||
|
|
|
@ -297,7 +297,7 @@ static int run(mddev_t *mddev)
|
|||
rdev_for_each(rdev, tmp, mddev)
|
||||
conf->rdev = rdev;
|
||||
|
||||
mddev->array_size = mddev->size;
|
||||
mddev->array_sectors = mddev->size * 2;
|
||||
mddev->private = conf;
|
||||
|
||||
reconfig(mddev, mddev->layout, -1);
|
||||
|
|
|
@ -122,13 +122,13 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||
return NULL;
|
||||
|
||||
cnt = 0;
|
||||
conf->array_size = 0;
|
||||
conf->array_sectors = 0;
|
||||
|
||||
rdev_for_each(rdev, tmp, mddev) {
|
||||
int j = rdev->raid_disk;
|
||||
dev_info_t *disk = conf->disks + j;
|
||||
|
||||
if (j < 0 || j > raid_disks || disk->rdev) {
|
||||
if (j < 0 || j >= raid_disks || disk->rdev) {
|
||||
printk("linear: disk numbering problem. Aborting!\n");
|
||||
goto out;
|
||||
}
|
||||
|
@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
|
||||
|
||||
disk->size = rdev->size;
|
||||
conf->array_size += rdev->size;
|
||||
conf->array_sectors += rdev->size * 2;
|
||||
|
||||
cnt++;
|
||||
}
|
||||
|
@ -155,7 +155,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||
goto out;
|
||||
}
|
||||
|
||||
min_spacing = conf->array_size;
|
||||
min_spacing = conf->array_sectors / 2;
|
||||
sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *));
|
||||
|
||||
/* min_spacing is the minimum spacing that will fit the hash
|
||||
|
@ -164,7 +164,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||
* that is larger than min_spacing as use the size of that as
|
||||
* the actual spacing
|
||||
*/
|
||||
conf->hash_spacing = conf->array_size;
|
||||
conf->hash_spacing = conf->array_sectors / 2;
|
||||
for (i=0; i < cnt-1 ; i++) {
|
||||
sector_t sz = 0;
|
||||
int j;
|
||||
|
@ -194,7 +194,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||
unsigned round;
|
||||
unsigned long base;
|
||||
|
||||
sz = conf->array_size >> conf->preshift;
|
||||
sz = conf->array_sectors >> (conf->preshift + 1);
|
||||
sz += 1; /* force round-up */
|
||||
base = conf->hash_spacing >> conf->preshift;
|
||||
round = sector_div(sz, base);
|
||||
|
@ -221,7 +221,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
|
|||
curr_offset = 0;
|
||||
i = 0;
|
||||
for (curr_offset = 0;
|
||||
curr_offset < conf->array_size;
|
||||
curr_offset < conf->array_sectors / 2;
|
||||
curr_offset += conf->hash_spacing) {
|
||||
|
||||
while (i < raid_disks-1 &&
|
||||
|
@ -258,7 +258,7 @@ static int linear_run (mddev_t *mddev)
|
|||
if (!conf)
|
||||
return 1;
|
||||
mddev->private = conf;
|
||||
mddev->array_size = conf->array_size;
|
||||
mddev->array_sectors = conf->array_sectors;
|
||||
|
||||
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
|
||||
mddev->queue->unplug_fn = linear_unplug;
|
||||
|
@ -292,8 +292,8 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
newconf->prev = mddev_to_conf(mddev);
|
||||
mddev->private = newconf;
|
||||
mddev->raid_disks++;
|
||||
mddev->array_size = newconf->array_size;
|
||||
set_capacity(mddev->gendisk, mddev->array_size << 1);
|
||||
mddev->array_sectors = newconf->array_sectors;
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
615
drivers/md/md.c
615
drivers/md/md.c
File diff suppressed because it is too large
Load diff
|
@ -281,13 +281,18 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
{
|
||||
multipath_conf_t *conf = mddev->private;
|
||||
struct request_queue *q;
|
||||
int found = 0;
|
||||
int err = -EEXIST;
|
||||
int path;
|
||||
struct multipath_info *p;
|
||||
int first = 0;
|
||||
int last = mddev->raid_disks - 1;
|
||||
|
||||
if (rdev->raid_disk >= 0)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
print_multipath_conf(conf);
|
||||
|
||||
for (path=0; path<mddev->raid_disks; path++)
|
||||
for (path = first; path <= last; path++)
|
||||
if ((p=conf->multipaths+path)->rdev == NULL) {
|
||||
q = rdev->bdev->bd_disk->queue;
|
||||
blk_queue_stack_limits(mddev->queue, q);
|
||||
|
@ -307,11 +312,13 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
rdev->raid_disk = path;
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
found = 1;
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
print_multipath_conf(conf);
|
||||
return found;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int multipath_remove_disk(mddev_t *mddev, int number)
|
||||
|
@ -497,7 +504,7 @@ static int multipath_run (mddev_t *mddev)
|
|||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
mddev->array_size = mddev->size;
|
||||
mddev->array_sectors = mddev->size * 2;
|
||||
|
||||
mddev->queue->unplug_fn = multipath_unplug;
|
||||
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
|
||||
|
|
|
@ -295,16 +295,16 @@ static int raid0_run (mddev_t *mddev)
|
|||
goto out_free_conf;
|
||||
|
||||
/* calculate array device size */
|
||||
mddev->array_size = 0;
|
||||
mddev->array_sectors = 0;
|
||||
rdev_for_each(rdev, tmp, mddev)
|
||||
mddev->array_size += rdev->size;
|
||||
mddev->array_sectors += rdev->size * 2;
|
||||
|
||||
printk("raid0 : md_size is %llu blocks.\n",
|
||||
(unsigned long long)mddev->array_size);
|
||||
(unsigned long long)mddev->array_sectors / 2);
|
||||
printk("raid0 : conf->hash_spacing is %llu blocks.\n",
|
||||
(unsigned long long)conf->hash_spacing);
|
||||
{
|
||||
sector_t s = mddev->array_size;
|
||||
sector_t s = mddev->array_sectors / 2;
|
||||
sector_t space = conf->hash_spacing;
|
||||
int round;
|
||||
conf->preshift = 0;
|
||||
|
|
|
@ -1100,11 +1100,16 @@ static int raid1_spare_active(mddev_t *mddev)
|
|||
static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||
{
|
||||
conf_t *conf = mddev->private;
|
||||
int found = 0;
|
||||
int err = -EEXIST;
|
||||
int mirror = 0;
|
||||
mirror_info_t *p;
|
||||
int first = 0;
|
||||
int last = mddev->raid_disks - 1;
|
||||
|
||||
for (mirror=0; mirror < mddev->raid_disks; mirror++)
|
||||
if (rdev->raid_disk >= 0)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
for (mirror = first; mirror <= last; mirror++)
|
||||
if ( !(p=conf->mirrors+mirror)->rdev) {
|
||||
|
||||
blk_queue_stack_limits(mddev->queue,
|
||||
|
@ -1119,7 +1124,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
|
||||
p->head_position = 0;
|
||||
rdev->raid_disk = mirror;
|
||||
found = 1;
|
||||
err = 0;
|
||||
/* As all devices are equivalent, we don't need a full recovery
|
||||
* if this was recently any drive of the array
|
||||
*/
|
||||
|
@ -1130,7 +1135,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
}
|
||||
|
||||
print_conf(conf);
|
||||
return found;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int raid1_remove_disk(mddev_t *mddev, int number)
|
||||
|
@ -2038,7 +2043,7 @@ static int run(mddev_t *mddev)
|
|||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
mddev->array_size = mddev->size;
|
||||
mddev->array_sectors = mddev->size * 2;
|
||||
|
||||
mddev->queue->unplug_fn = raid1_unplug;
|
||||
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
|
||||
|
@ -2100,14 +2105,15 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
|
|||
* any io in the removed space completes, but it hardly seems
|
||||
* worth it.
|
||||
*/
|
||||
mddev->array_size = sectors>>1;
|
||||
set_capacity(mddev->gendisk, mddev->array_size << 1);
|
||||
mddev->array_sectors = sectors;
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
mddev->changed = 1;
|
||||
if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) {
|
||||
if (mddev->array_sectors / 2 > mddev->size &&
|
||||
mddev->recovery_cp == MaxSector) {
|
||||
mddev->recovery_cp = mddev->size << 1;
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
}
|
||||
mddev->size = mddev->array_size;
|
||||
mddev->size = mddev->array_sectors / 2;
|
||||
mddev->resync_max_sectors = sectors;
|
||||
return 0;
|
||||
}
|
||||
|
@ -2131,7 +2137,7 @@ static int raid1_reshape(mddev_t *mddev)
|
|||
conf_t *conf = mddev_to_conf(mddev);
|
||||
int cnt, raid_disks;
|
||||
unsigned long flags;
|
||||
int d, d2;
|
||||
int d, d2, err;
|
||||
|
||||
/* Cannot change chunk_size, layout, or level */
|
||||
if (mddev->chunk_size != mddev->new_chunk ||
|
||||
|
@ -2143,7 +2149,9 @@ static int raid1_reshape(mddev_t *mddev)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
md_allow_write(mddev);
|
||||
err = md_allow_write(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
raid_disks = mddev->raid_disks + mddev->delta_disks;
|
||||
|
||||
|
|
|
@ -1114,24 +1114,30 @@ static int raid10_spare_active(mddev_t *mddev)
|
|||
static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||
{
|
||||
conf_t *conf = mddev->private;
|
||||
int found = 0;
|
||||
int err = -EEXIST;
|
||||
int mirror;
|
||||
mirror_info_t *p;
|
||||
int first = 0;
|
||||
int last = mddev->raid_disks - 1;
|
||||
|
||||
if (mddev->recovery_cp < MaxSector)
|
||||
/* only hot-add to in-sync arrays, as recovery is
|
||||
* very different from resync
|
||||
*/
|
||||
return 0;
|
||||
return -EBUSY;
|
||||
if (!enough(conf))
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
|
||||
if (rdev->raid_disk)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
if (rdev->saved_raid_disk >= 0 &&
|
||||
rdev->saved_raid_disk >= first &&
|
||||
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
|
||||
mirror = rdev->saved_raid_disk;
|
||||
else
|
||||
mirror = 0;
|
||||
for ( ; mirror < mddev->raid_disks; mirror++)
|
||||
mirror = first;
|
||||
for ( ; mirror <= last ; mirror++)
|
||||
if ( !(p=conf->mirrors+mirror)->rdev) {
|
||||
|
||||
blk_queue_stack_limits(mddev->queue,
|
||||
|
@ -1146,7 +1152,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
|
||||
p->head_position = 0;
|
||||
rdev->raid_disk = mirror;
|
||||
found = 1;
|
||||
err = 0;
|
||||
if (rdev->saved_raid_disk != mirror)
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
|
@ -1154,7 +1160,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
}
|
||||
|
||||
print_conf(conf);
|
||||
return found;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int raid10_remove_disk(mddev_t *mddev, int number)
|
||||
|
@ -2159,7 +2165,7 @@ static int run(mddev_t *mddev)
|
|||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
mddev->array_size = size << (conf->chunk_shift-1);
|
||||
mddev->array_sectors = size << conf->chunk_shift;
|
||||
mddev->resync_max_sectors = size << conf->chunk_shift;
|
||||
|
||||
mddev->queue->unplug_fn = raid10_unplug;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -221,6 +221,7 @@ struct bitmap {
|
|||
unsigned long syncchunk;
|
||||
|
||||
__u64 events_cleared;
|
||||
int need_sync;
|
||||
|
||||
/* bitmap spinlock */
|
||||
spinlock_t lock;
|
||||
|
|
|
@ -16,7 +16,7 @@ struct linear_private_data
|
|||
struct linear_private_data *prev; /* earlier version */
|
||||
dev_info_t **hash_table;
|
||||
sector_t hash_spacing;
|
||||
sector_t array_size;
|
||||
sector_t array_sectors;
|
||||
int preshift; /* shift before dividing by hash_spacing */
|
||||
dev_info_t disks[0];
|
||||
};
|
||||
|
|
|
@ -95,7 +95,7 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
|
|||
struct page *page, int rw);
|
||||
extern void md_do_sync(mddev_t *mddev);
|
||||
extern void md_new_event(mddev_t *mddev);
|
||||
extern void md_allow_write(mddev_t *mddev);
|
||||
extern int md_allow_write(mddev_t *mddev);
|
||||
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
|
||||
|
||||
#endif /* CONFIG_MD */
|
||||
|
|
|
@ -59,7 +59,7 @@ struct mdk_rdev_s
|
|||
int sb_loaded;
|
||||
__u64 sb_events;
|
||||
sector_t data_offset; /* start of data in array */
|
||||
sector_t sb_offset;
|
||||
sector_t sb_start; /* offset of the super block (in 512byte sectors) */
|
||||
int sb_size; /* bytes in the superblock */
|
||||
int preferred_minor; /* autorun support */
|
||||
|
||||
|
@ -87,6 +87,9 @@ struct mdk_rdev_s
|
|||
#define Blocked 8 /* An error occured on an externally
|
||||
* managed array, don't allow writes
|
||||
* until it is cleared */
|
||||
#define StateChanged 9 /* Faulty or Blocked has changed during
|
||||
* interrupt, so it needs to be
|
||||
* notified by the thread */
|
||||
wait_queue_head_t blocked_wait;
|
||||
|
||||
int desc_nr; /* descriptor index in the superblock */
|
||||
|
@ -147,7 +150,7 @@ struct mddev_s
|
|||
int raid_disks;
|
||||
int max_disks;
|
||||
sector_t size; /* used size of component devices */
|
||||
sector_t array_size; /* exported array size */
|
||||
sector_t array_sectors; /* exported array size */
|
||||
__u64 events;
|
||||
|
||||
char uuid[16];
|
||||
|
@ -188,6 +191,7 @@ struct mddev_s
|
|||
* NEEDED: we might need to start a resync/recover
|
||||
* RUNNING: a thread is running, or about to be started
|
||||
* SYNC: actually doing a resync, not a recovery
|
||||
* RECOVER: doing recovery, or need to try it.
|
||||
* INTR: resync needs to be aborted for some reason
|
||||
* DONE: thread is done and is waiting to be reaped
|
||||
* REQUEST: user-space has requested a sync (used with SYNC)
|
||||
|
@ -198,6 +202,7 @@ struct mddev_s
|
|||
*/
|
||||
#define MD_RECOVERY_RUNNING 0
|
||||
#define MD_RECOVERY_SYNC 1
|
||||
#define MD_RECOVERY_RECOVER 2
|
||||
#define MD_RECOVERY_INTR 3
|
||||
#define MD_RECOVERY_DONE 4
|
||||
#define MD_RECOVERY_NEEDED 5
|
||||
|
@ -210,7 +215,8 @@ struct mddev_s
|
|||
|
||||
int in_sync; /* know to not need resync */
|
||||
struct mutex reconfig_mutex;
|
||||
atomic_t active;
|
||||
atomic_t active; /* general refcount */
|
||||
atomic_t openers; /* number of active opens */
|
||||
|
||||
int changed; /* true if we might need to reread partition info */
|
||||
int degraded; /* whether md should consider
|
||||
|
@ -227,6 +233,8 @@ struct mddev_s
|
|||
atomic_t recovery_active; /* blocks scheduled, but not written */
|
||||
wait_queue_head_t recovery_wait;
|
||||
sector_t recovery_cp;
|
||||
sector_t resync_min; /* user requested sync
|
||||
* starts here */
|
||||
sector_t resync_max; /* resync should pause
|
||||
* when it gets here */
|
||||
|
||||
|
@ -331,6 +339,9 @@ static inline char * mdname (mddev_t * mddev)
|
|||
#define rdev_for_each(rdev, tmp, mddev) \
|
||||
rdev_for_each_list(rdev, tmp, (mddev)->disks)
|
||||
|
||||
#define rdev_for_each_rcu(rdev, mddev) \
|
||||
list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
|
||||
|
||||
typedef struct mdk_thread_s {
|
||||
void (*run) (mddev_t *mddev);
|
||||
mddev_t *mddev;
|
||||
|
|
|
@ -43,14 +43,11 @@
|
|||
*/
|
||||
#define MD_RESERVED_BYTES (64 * 1024)
|
||||
#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512)
|
||||
#define MD_RESERVED_BLOCKS (MD_RESERVED_BYTES / BLOCK_SIZE)
|
||||
|
||||
#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS)
|
||||
#define MD_NEW_SIZE_BLOCKS(x) ((x & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS)
|
||||
|
||||
#define MD_SB_BYTES 4096
|
||||
#define MD_SB_WORDS (MD_SB_BYTES / 4)
|
||||
#define MD_SB_BLOCKS (MD_SB_BYTES / BLOCK_SIZE)
|
||||
#define MD_SB_SECTORS (MD_SB_BYTES / 512)
|
||||
|
||||
/*
|
||||
|
|
|
@ -158,6 +158,43 @@
|
|||
* the compute block completes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Operations state - intermediate states that are visible outside of sh->lock
|
||||
* In general _idle indicates nothing is running, _run indicates a data
|
||||
* processing operation is active, and _result means the data processing result
|
||||
* is stable and can be acted upon. For simple operations like biofill and
|
||||
* compute that only have an _idle and _run state they are indicated with
|
||||
* sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN)
|
||||
*/
|
||||
/**
|
||||
* enum check_states - handles syncing / repairing a stripe
|
||||
* @check_state_idle - check operations are quiesced
|
||||
* @check_state_run - check operation is running
|
||||
* @check_state_result - set outside lock when check result is valid
|
||||
* @check_state_compute_run - check failed and we are repairing
|
||||
* @check_state_compute_result - set outside lock when compute result is valid
|
||||
*/
|
||||
enum check_states {
|
||||
check_state_idle = 0,
|
||||
check_state_run, /* parity check */
|
||||
check_state_check_result,
|
||||
check_state_compute_run, /* parity repair */
|
||||
check_state_compute_result,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum reconstruct_states - handles writing or expanding a stripe
|
||||
*/
|
||||
enum reconstruct_states {
|
||||
reconstruct_state_idle = 0,
|
||||
reconstruct_state_prexor_drain_run, /* prexor-write */
|
||||
reconstruct_state_drain_run, /* write */
|
||||
reconstruct_state_run, /* expand */
|
||||
reconstruct_state_prexor_drain_result,
|
||||
reconstruct_state_drain_result,
|
||||
reconstruct_state_result,
|
||||
};
|
||||
|
||||
struct stripe_head {
|
||||
struct hlist_node hash;
|
||||
struct list_head lru; /* inactive_list or handle_list */
|
||||
|
@ -169,19 +206,13 @@ struct stripe_head {
|
|||
spinlock_t lock;
|
||||
int bm_seq; /* sequence number for bitmap flushes */
|
||||
int disks; /* disks in stripe */
|
||||
enum check_states check_state;
|
||||
enum reconstruct_states reconstruct_state;
|
||||
/* stripe_operations
|
||||
* @pending - pending ops flags (set for request->issue->complete)
|
||||
* @ack - submitted ops flags (set for issue->complete)
|
||||
* @complete - completed ops flags (set for complete)
|
||||
* @target - STRIPE_OP_COMPUTE_BLK target
|
||||
* @count - raid5_runs_ops is set to run when this is non-zero
|
||||
*/
|
||||
struct stripe_operations {
|
||||
unsigned long pending;
|
||||
unsigned long ack;
|
||||
unsigned long complete;
|
||||
int target;
|
||||
int count;
|
||||
u32 zero_sum_result;
|
||||
} ops;
|
||||
struct r5dev {
|
||||
|
@ -202,6 +233,7 @@ struct stripe_head_state {
|
|||
int locked, uptodate, to_read, to_write, failed, written;
|
||||
int to_fill, compute, req_compute, non_overwrite;
|
||||
int failed_num;
|
||||
unsigned long ops_request;
|
||||
};
|
||||
|
||||
/* r6_state - extra state data only relevant to r6 */
|
||||
|
@ -228,9 +260,7 @@ struct r6_state {
|
|||
#define R5_Wantfill 12 /* dev->toread contains a bio that needs
|
||||
* filling
|
||||
*/
|
||||
#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from
|
||||
* other "towrites"
|
||||
*/
|
||||
#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
|
||||
/*
|
||||
* Write method
|
||||
*/
|
||||
|
@ -254,8 +284,10 @@ struct r6_state {
|
|||
#define STRIPE_EXPAND_READY 11
|
||||
#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */
|
||||
#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */
|
||||
#define STRIPE_BIOFILL_RUN 14
|
||||
#define STRIPE_COMPUTE_RUN 15
|
||||
/*
|
||||
* Operations flags (in issue order)
|
||||
* Operation request flags
|
||||
*/
|
||||
#define STRIPE_OP_BIOFILL 0
|
||||
#define STRIPE_OP_COMPUTE_BLK 1
|
||||
|
@ -263,14 +295,6 @@ struct r6_state {
|
|||
#define STRIPE_OP_BIODRAIN 3
|
||||
#define STRIPE_OP_POSTXOR 4
|
||||
#define STRIPE_OP_CHECK 5
|
||||
#define STRIPE_OP_IO 6
|
||||
|
||||
/* modifiers to the base operations
|
||||
* STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
|
||||
* STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
|
||||
*/
|
||||
#define STRIPE_OP_MOD_REPAIR_PD 7
|
||||
#define STRIPE_OP_MOD_DMA_CHECK 8
|
||||
|
||||
/*
|
||||
* Plugging:
|
||||
|
|
Loading…
Reference in a new issue