dm raid: add raid4/5/6 journal write-back support via journal_mode option
Commit 63c32ed4af
("dm raid: add raid4/5/6 journaling support") added
journal support to close the raid4/5/6 "write hole" -- in terms of
writethrough caching.
Introduce a "journal_mode" feature and use the new
r5c_journal_mode_set() API to add support for switching the journal
device's cache mode between write-through (the current default) and
write-back.
NOTE: If the journal device is not layered on resilent storage and it
fails, write-through mode will cause the "write hole" to reoccur. But
if the journal fails while in write-back mode it will cause data loss
for any dirty cache entries unless resilent storage is used for the
journal.
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
4464e36e06
commit
6e53636fe8
2 changed files with 101 additions and 14 deletions
|
@ -170,6 +170,13 @@ The target is named "raid" and it accepts the following parameters:
|
|||
Takeover/reshape is not possible with a raid4/5/6 journal device;
|
||||
it has to be deconfigured before requesting these.
|
||||
|
||||
[journal_mode <mode>]
|
||||
This option sets the caching mode on journaled raid4/5/6 raid sets
|
||||
(see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
|
||||
If 'writeback' is selected the journal device has to be resilient
|
||||
and must not suffer from the 'write hole' problem itself (e.g. use
|
||||
raid1 or raid10) to avoid a single point of failure.
|
||||
|
||||
<#raid_devs>: The number of devices composing the array.
|
||||
Each device consists of two entries. The first is the device
|
||||
containing the metadata (if any); the second is the one containing the
|
||||
|
@ -254,7 +261,8 @@ recovery. Here is a fuller description of the individual fields:
|
|||
<data_offset> The current data offset to the start of the user data on
|
||||
each component device of a raid set (see the respective
|
||||
raid parameter to support out-of-place reshaping).
|
||||
<journal_char> 'A' - active raid4/5/6 journal device.
|
||||
<journal_char> 'A' - active write-through journal device.
|
||||
'a' - active write-back journal device.
|
||||
'D' - dead journal device.
|
||||
'-' - no journal device.
|
||||
|
||||
|
@ -334,3 +342,4 @@ Version History
|
|||
1.10.1 Fix data corruption on reshape request
|
||||
1.11.0 Fix table line argument order
|
||||
(wrong raid10_copies/raid10_format sequence)
|
||||
1.11.1 Add raid4/5/6 journal write-back support via journal_mode option
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2010-2011 Neil Brown
|
||||
* Copyright (C) 2010-2016 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
@ -79,7 +79,10 @@ struct raid_dev {
|
|||
#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */
|
||||
|
||||
/* New for v1.10.0 */
|
||||
#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6! */
|
||||
#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6 (journal device)! */
|
||||
|
||||
/* New for v1.11.1 */
|
||||
#define __CTR_FLAG_JOURNAL_MODE 16 /* 2 */ /* Only with raid4/5/6 (journal mode)! */
|
||||
|
||||
/*
|
||||
* Flags for rs->ctr_flags field.
|
||||
|
@ -100,6 +103,7 @@ struct raid_dev {
|
|||
#define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET)
|
||||
#define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
|
||||
#define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV)
|
||||
#define CTR_FLAG_JOURNAL_MODE (1 << __CTR_FLAG_JOURNAL_MODE)
|
||||
|
||||
#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
|
||||
|
||||
|
@ -175,7 +179,8 @@ struct raid_dev {
|
|||
CTR_FLAG_REGION_SIZE | \
|
||||
CTR_FLAG_DELTA_DISKS | \
|
||||
CTR_FLAG_DATA_OFFSET | \
|
||||
CTR_FLAG_JOURNAL_DEV)
|
||||
CTR_FLAG_JOURNAL_DEV | \
|
||||
CTR_FLAG_JOURNAL_MODE)
|
||||
|
||||
#define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \
|
||||
CTR_FLAG_REBUILD | \
|
||||
|
@ -186,7 +191,8 @@ struct raid_dev {
|
|||
CTR_FLAG_REGION_SIZE | \
|
||||
CTR_FLAG_DELTA_DISKS | \
|
||||
CTR_FLAG_DATA_OFFSET | \
|
||||
CTR_FLAG_JOURNAL_DEV)
|
||||
CTR_FLAG_JOURNAL_DEV | \
|
||||
CTR_FLAG_JOURNAL_MODE)
|
||||
/* ...valid options definitions per raid level */
|
||||
|
||||
/*
|
||||
|
@ -239,6 +245,7 @@ struct raid_set {
|
|||
struct journal_dev {
|
||||
struct dm_dev *dev;
|
||||
struct md_rdev rdev;
|
||||
int mode;
|
||||
} journal_dev;
|
||||
|
||||
struct raid_dev dev[0];
|
||||
|
@ -326,6 +333,7 @@ static struct arg_name_flag {
|
|||
{ CTR_FLAG_DELTA_DISKS, "delta_disks"},
|
||||
{ CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"},
|
||||
{ CTR_FLAG_JOURNAL_DEV, "journal_dev" },
|
||||
{ CTR_FLAG_JOURNAL_MODE, "journal_mode" },
|
||||
};
|
||||
|
||||
/* Return argument name string for given @flag */
|
||||
|
@ -344,6 +352,39 @@ static const char *dm_raid_arg_name_by_flag(const uint32_t flag)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Define correlation of raid456 journal cache modes and dm-raid target line parameters */
|
||||
static struct {
|
||||
const int mode;
|
||||
const char *param;
|
||||
} _raid456_journal_mode[] = {
|
||||
{ R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" },
|
||||
{ R5C_JOURNAL_MODE_WRITE_BACK , "writeback" }
|
||||
};
|
||||
|
||||
/* Return MD raid4/5/6 journal mode for dm @journal_mode one */
|
||||
static int dm_raid_journal_mode_to_md(const char *mode)
|
||||
{
|
||||
int m = ARRAY_SIZE(_raid456_journal_mode);
|
||||
|
||||
while (m--)
|
||||
if (!strcasecmp(mode, _raid456_journal_mode[m].param))
|
||||
return _raid456_journal_mode[m].mode;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Return dm-raid raid4/5/6 journal mode string for @mode */
|
||||
static const char *md_journal_mode_to_dm_raid(const int mode)
|
||||
{
|
||||
int m = ARRAY_SIZE(_raid456_journal_mode);
|
||||
|
||||
while (m--)
|
||||
if (mode == _raid456_journal_mode[m].mode)
|
||||
return _raid456_journal_mode[m].param;
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/*
|
||||
* Bool helpers to test for various raid levels of a raid set.
|
||||
* It's level as reported by the superblock rather than
|
||||
|
@ -1183,7 +1224,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* "journal_dev dev" */
|
||||
/* "journal_dev <dev>" */
|
||||
if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) {
|
||||
int r;
|
||||
struct md_rdev *jdev;
|
||||
|
@ -1211,10 +1252,32 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||
rs->ti->error = "No space for raid4/5/6 journal";
|
||||
return -ENOSPC;
|
||||
}
|
||||
rs->journal_dev.mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
|
||||
set_bit(Journal, &jdev->flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* "journal_mode <mode>" ("journal_dev" mandatory!) */
|
||||
if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE))) {
|
||||
int r;
|
||||
|
||||
if (!test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
|
||||
rs->ti->error = "raid4/5/6 'journal_mode' is invalid without 'journal_dev'";
|
||||
return -EINVAL;
|
||||
}
|
||||
if (test_and_set_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
||||
rs->ti->error = "Only one raid4/5/6 'journal_mode' argument allowed";
|
||||
return -EINVAL;
|
||||
}
|
||||
r = dm_raid_journal_mode_to_md(arg);
|
||||
if (r < 0) {
|
||||
rs->ti->error = "Invalid 'journal_mode' argument";
|
||||
return r;
|
||||
}
|
||||
rs->journal_dev.mode = r;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parameters with number values from here on.
|
||||
*/
|
||||
|
@ -3076,6 +3139,16 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
rs->callbacks.congested_fn = raid_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
||||
|
||||
/* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
||||
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
|
||||
if (r) {
|
||||
ti->error = "Failed to set raid4/5/6 journal mode";
|
||||
mddev_unlock(&rs->md);
|
||||
goto bad_journal_mode_set;
|
||||
}
|
||||
}
|
||||
|
||||
mddev_suspend(&rs->md);
|
||||
|
||||
/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
|
||||
|
@ -3109,6 +3182,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
mddev_unlock(&rs->md);
|
||||
return 0;
|
||||
|
||||
bad_journal_mode_set:
|
||||
bad_stripe_cache:
|
||||
bad_check_reshape:
|
||||
md_stop(&rs->md);
|
||||
|
@ -3180,18 +3254,18 @@ static const char *decipher_sync_action(struct mddev *mddev)
|
|||
* Status characters:
|
||||
*
|
||||
* 'D' = Dead/Failed raid set component or raid4/5/6 journal device
|
||||
* 'a' = Alive but not in-sync
|
||||
* 'A' = Alive and in-sync raid set component or alive raid4/5/6 journal device
|
||||
* 'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device
|
||||
* 'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device
|
||||
* '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
|
||||
*/
|
||||
static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
|
||||
static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev, bool array_in_sync)
|
||||
{
|
||||
if (!rdev->bdev)
|
||||
return "-";
|
||||
else if (test_bit(Faulty, &rdev->flags))
|
||||
return "D";
|
||||
else if (test_bit(Journal, &rdev->flags))
|
||||
return "A";
|
||||
return (rs->journal_dev.mode == R5C_JOURNAL_MODE_WRITE_THROUGH) ? "A" : "a";
|
||||
else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
|
||||
return "a";
|
||||
else
|
||||
|
@ -3315,7 +3389,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(__raid_dev_status(&rs->dev[i].rdev, array_in_sync));
|
||||
DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev, array_in_sync));
|
||||
|
||||
/*
|
||||
* In-sync/Reshape ratio:
|
||||
|
@ -3366,7 +3440,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
* v1.10.0+:
|
||||
*/
|
||||
DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ?
|
||||
__raid_dev_status(&rs->journal_dev.rdev, 0) : "-");
|
||||
__raid_dev_status(rs, &rs->journal_dev.rdev, 0) : "-");
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
|
@ -3381,7 +3455,8 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
write_mostly_params +
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 +
|
||||
(test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0);
|
||||
(test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) +
|
||||
(test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0);
|
||||
|
||||
/* Emit table line */
|
||||
/* This has to be in the documented order for userspace! */
|
||||
|
@ -3433,6 +3508,9 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV),
|
||||
__get_dev_name(rs->journal_dev.dev));
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE),
|
||||
md_journal_mode_to_dm_raid(rs->journal_dev.mode));
|
||||
DMEMIT(" %d", rs->raid_disks);
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
|
||||
|
@ -3793,7 +3871,7 @@ static void raid_resume(struct dm_target *ti)
|
|||
|
||||
static struct target_type raid_target = {
|
||||
.name = "raid",
|
||||
.version = {1, 11, 0},
|
||||
.version = {1, 11, 1},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = raid_ctr,
|
||||
.dtr = raid_dtr,
|
||||
|
|
Loading…
Reference in a new issue