[dm-devel] [PATCH] DM RAID: Add support for MD RAID10 personality
Jonathan Brassow
jbrassow at redhat.com
Tue Jun 26 12:03:51 UTC 2012
dm raid: add md raid10 support
Support the MD RAID10 personality through dm-raid.c
Signed-off-by: Jonathan Brassow <jbrassow at redhat.com>
Index: linux-upstream/drivers/md/dm-raid.c
===================================================================
--- linux-upstream.orig/drivers/md/dm-raid.c
+++ linux-upstream/drivers/md/dm-raid.c
@@ -11,6 +11,7 @@
#include "md.h"
#include "raid1.h"
#include "raid5.h"
+#include "raid10.h"
#include "bitmap.h"
#include <linux/device-mapper.h>
@@ -52,7 +53,11 @@ struct raid_dev {
#define DMPF_MAX_RECOVERY_RATE 0x20
#define DMPF_MAX_WRITE_BEHIND 0x40
#define DMPF_STRIPE_CACHE 0x80
-#define DMPF_REGION_SIZE 0X100
+#define DMPF_REGION_SIZE 0x100
+#define DMPF_RAID10_NEAR_COPIES 0x200
+#define DMPF_RAID10_FAR_COPIES 0x400
+#define DMPF_RAID10_FAR_OFFSET 0x800
+
struct raid_set {
struct dm_target *ti;
@@ -66,6 +71,15 @@ struct raid_set {
struct raid_dev dev[0];
};
+/* near_copies in first byte */
+/* far_copies in second byte */
+/* far_offset in 17th bit */
+#define ALGORITHM_RAID10(near_copies, far_copies, far_offset) \
+ ((near_copies & 0xFF) | ((far_copies & 0xFF) << 8) | ((!!far_offset) << 16))
+#define RAID10_NC(layout) (layout & 0xFF)
+#define RAID10_FC(layout) ((layout >> 8) & 0xFF)
+#define RAID10_FO(layout) (layout & 0x10000)
+
/* Supported raid types and properties. */
static struct raid_type {
const char *name; /* RAID algorithm. */
@@ -76,6 +90,8 @@ static struct raid_type {
const unsigned algorithm; /* RAID algorithm. */
} raid_types[] = {
{"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
+ {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, -1 /* Varies */},
+ {"raid1e", "RAID1E (Enhanced RAID1)", 0, 2, 10, -1 /* Varies */},
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -339,10 +355,17 @@ static int validate_region_size(struct r
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
* [region_size <sectors>] Defines granularity of bitmap
+ *
+ * RAID10-only options:
+ * [raid10_near_copies <# copies>] Near copies. (Default: 2)
+ * [raid10_far_copies <# copies>] Far copies. (Default: 1)
+ * [raid10_far_offset <0/1>] Offset is device size(0) or stripe(1).
*/
static int parse_raid_params(struct raid_set *rs, char **argv,
unsigned num_raid_params)
{
+ unsigned raid10_default = ALGORITHM_RAID10(2, 1, 0);
+ unsigned raid10_nc = 1, raid10_fc = 1, raid10_fo = 0;
unsigned i, rebuild_cnt = 0;
unsigned long value, region_size = 0;
sector_t sectors_per_dev = rs->ti->len;
@@ -435,6 +458,7 @@ static int parse_raid_params(struct raid
if (rebuild_cnt > rs->raid_type->parity_devs)
rs->ti->error = "Too many rebuild devices specified for given RAID type";
break;
+ case 10:
default:
DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name);
rs->ti->error = "Rebuild not supported for this RAID type";
@@ -492,7 +516,7 @@ static int parse_raid_params(struct raid
*/
value /= 2;
- if (rs->raid_type->level < 5) {
+ if (rs->raid_type->level != 5) {
rs->ti->error = "Inappropriate argument: stripe_cache";
return -EINVAL;
}
@@ -517,6 +541,33 @@ static int parse_raid_params(struct raid
} else if (!strcasecmp(key, "region_size")) {
rs->print_flags |= DMPF_REGION_SIZE;
region_size = value;
+ } else if (!strcasecmp(key, "raid10_near_copies") &&
+ (rs->raid_type->level == 10)) {
+ if ((value < 1) || (value > 0xFF)) {
+ rs->ti->error = "Bad value for 'raid10_near_copies'";
+ return -EINVAL;
+ }
+ rs->print_flags |= DMPF_RAID10_NEAR_COPIES;
+ raid10_nc = value;
+ raid10_default = 0;
+ } else if (!strcasecmp(key, "raid10_far_copies") &&
+ (rs->raid_type->level == 10)) {
+ if ((value < 1) || (value > 0xFF)) {
+ rs->ti->error = "Bad value for 'raid10_far_copies'";
+ return -EINVAL;
+ }
+ rs->print_flags |= DMPF_RAID10_FAR_COPIES;
+ raid10_fc = value;
+ raid10_default = 0;
+ } else if (!strcasecmp(key, "raid10_far_offset") &&
+ (rs->raid_type->level == 10)) {
+ if (value > 1) {
+ rs->ti->error = "Bad value for 'raid10_far_offset'";
+ return -EINVAL;
+ }
+ rs->print_flags |= DMPF_RAID10_FAR_OFFSET;
+ raid10_fo = value;
+ raid10_default = 0;
} else {
DMERR("Unable to parse RAID parameter: %s", key);
rs->ti->error = "Unable to parse RAID parameters";
@@ -532,9 +583,33 @@ static int parse_raid_params(struct raid
else
rs->ti->split_io = region_size;
- if ((rs->raid_type->level > 1) &&
- sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) {
+ if (rs->raid_type->level == 10) {
+ /* (Len * Stripes) / Mirrors */
+ sectors_per_dev *= rs->md.raid_disks;
+ if (sector_div(sectors_per_dev, (raid10_nc * raid10_fc))) {
+ rs->ti->error = "Target length not divisible by number of data devices";
+ return -EINVAL;
+ }
+ if ((raid10_nc * raid10_fc) > rs->md.raid_disks) {
+ rs->ti->error = "Not enough devices to satisfy specification";
+ return -EINVAL;
+ }
+ if (raid10_fo && (raid10_fc < 2)) {
+ DMWARN("RAID10 parameter 'far_offset' ignored");
+ raid10_fo = 0;
+ }
+
+ if (raid10_default)
+ rs->md.layout = raid10_default;
+ else
+ rs->md.layout = ALGORITHM_RAID10(raid10_nc,
+ raid10_fc, raid10_fo);
+ rs->md.new_layout = rs->md.layout;
+ } else if ((rs->raid_type->level > 1) &&
+ sector_div(sectors_per_dev,
+ (rs->md.raid_disks - rs->raid_type->parity_devs))) {
rs->ti->error = "Target length not divisible by number of data devices";
+
return -EINVAL;
}
rs->md.dev_sectors = sectors_per_dev;
@@ -560,6 +635,9 @@ static int raid_is_congested(struct dm_t
if (rs->raid_type->level == 1)
return md_raid1_congested(&rs->md, bits);
+ if (rs->raid_type->level == 10)
+ return md_raid10_congested(&rs->md, bits);
+
return md_raid5_congested(&rs->md, bits);
}
@@ -878,6 +956,9 @@ static int analyse_superblocks(struct dm
case 6:
redundancy = rs->raid_type->parity_devs;
break;
+ case 10:
+ redundancy = RAID10_NC(mddev->layout) * RAID10_FC(mddev->layout);
+ break;
default:
ti->error = "Unknown RAID type";
return -EINVAL;
@@ -1197,6 +1278,18 @@ static int raid_status(struct dm_target
DMEMIT(" region_size %lu",
rs->md.bitmap_info.chunksize >> 9);
+ if (rs->print_flags & DMPF_RAID10_NEAR_COPIES)
+ DMEMIT(" raid10_near_copies %u",
+ RAID10_NC(rs->md.layout));
+
+ if (rs->print_flags & DMPF_RAID10_FAR_COPIES)
+ DMEMIT(" raid10_far_copies %u",
+ RAID10_FC(rs->md.layout));
+
+ if (rs->print_flags & DMPF_RAID10_FAR_OFFSET)
+ DMEMIT(" raid10_far_offset %u",
+ RAID10_FO(rs->md.layout));
+
DMEMIT(" %d", rs->md.raid_disks);
for (i = 0; i < rs->md.raid_disks; i++) {
if (rs->dev[i].meta_dev)
@@ -1271,7 +1364,7 @@ static void raid_resume(struct dm_target
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
@@ -1298,6 +1391,8 @@ module_init(dm_raid_init);
module_exit(dm_raid_exit);
MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
+MODULE_ALIAS("dm-raid1");
+MODULE_ALIAS("dm-raid10");
MODULE_ALIAS("dm-raid4");
MODULE_ALIAS("dm-raid5");
MODULE_ALIAS("dm-raid6");
Index: linux-upstream/Documentation/device-mapper/dm-raid.txt
===================================================================
--- linux-upstream.orig/Documentation/device-mapper/dm-raid.txt
+++ linux-upstream/Documentation/device-mapper/dm-raid.txt
@@ -27,6 +27,11 @@ The target is named "raid" and it accept
- rotating parity N (right-to-left) with data restart
raid6_nc RAID6 N continue
- rotating parity N (right-to-left) with data continuation
+ raid10/raid1e Various RAID10 inspired algorithms chosen by additional params
+ - RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
+ - RAID1E: Integrated Adjacent Stripe Mirroring
+ - RAID1E: Integrated Offset Stripe Mirroring
+ - and other similar RAID10 variants
Reference: Chapter 4 of
http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
@@ -59,6 +64,80 @@ The target is named "raid" and it accept
logical size of the array. The bitmap records the device
synchronisation state for each region.
+ [raid10_near_copies <# copies>]
+ [raid10_far_copies <# copies>]
+ [raid10_far_offset <0/1>]
+ These three options are used to alter the default layout of
+ a RAID10/RAID1E configuration. The total number of copies is
+ given by the number of "near" (aka "adjacent") copies times
+ the number of "far" (aka "offset") copies. Near copies
+ are what most people think of with respect to mirroring.
+ If 'raid10_near_copies 2', 'raid10_far_copies 1' and
+ 'raid10_far_offset 0', then the layouts for 2, 3 and 4 devices
+ are:
+ 2 drives 3 drives 4 drives
+ -------- ---------- --------------
+ A1 A1 A1 A1 A2 A1 A1 A2 A2
+ A2 A2 A2 A3 A3 A3 A3 A4 A4
+ A3 A3 A4 A4 A5 A5 A5 A6 A6
+ A4 A4 A5 A6 A6 A7 A7 A8 A8
+ .. .. .. .. .. .. .. .. ..
+ The 2-device layout is equivalent 2-way RAID1. The 4-device
+ layout is what a traditional RAID10 would look like. The
+ 3-device layout is what might be called a 'RAID1E - Integrated
+ Adjacent Stripe Mirroring'.
+
+ The 'raid10_far_[copies|offset]' arguments work together to
+ determine where any "far"/"offset" copies will be placed.
+ If 'raid10_near_copies 1', 'raid10_far_copies 2' and
+ 'raid10_far_offset 0', then the layouts for 2, 3 and 4 devices
+ are:
+ 2 drives 3 drives 4 drives
+ -------- -------------- --------------------
+ A1 A2 A1 A2 A3 A1 A2 A3 A4
+ A3 A4 A4 A5 A6 A5 A6 A7 A8
+ A5 A6 A7 A8 A9 A9 A10 A11 A12
+ .. .. .. .. .. .. .. .. ..
+ A2 A1 A3 A1 A2 A4 A1 A2 A3
+ A4 A3 A6 A4 A5 A8 A5 A6 A7
+ A6 A5 A9 A7 A8 A12 A9 A10 A11
+ .. .. .. .. .. .. .. .. ..
+
+ If 'raid10_near_copies 1', 'raid10_far_copies 2' and
+ 'raid10_far_offset 1', then the layouts for 2, 3 and 4 devices
+ are:
+ 2 drives 3 drives 4 drives
+ -------- ------------ -----------------
+ A1 A2 A1 A2 A3 A1 A2 A3 A4
+ A2 A1 A3 A1 A2 A4 A1 A2 A3
+ A3 A4 A4 A5 A6 A5 A6 A7 A8
+ A4 A3 A6 A4 A5 A8 A5 A6 A7
+ A5 A6 A7 A8 A9 A9 A10 A11 A12
+ A6 A5 A9 A7 A8 A12 A9 A10 A11
+ .. .. .. .. .. .. .. .. ..
+ Here we see layouts closely akin to 'RAID1E - Integrated
+ Offset Stripe Mirroring'.
+
+ Near and far copies can both be specified giving more
+ complex arrangements. If 'raid10_near_copies 2',
+ 'raid10_far_copies 2' and 'raid10_far_offset 0', then the
+ layouts for 4 and 5 devices are:
+ 4 drives 5 drives
+ -------- --------
+ A1 A1 A2 A2 A1 A1 A2 A2 A3
+ A3 A3 A4 A4 A3 A4 A4 A5 A5
+ A5 A5 A6 A6 A6 A6 A7 A7 A8
+ A7 A7 A8 A8 A8 A9 A9 A10 A10
+ .. .. .. .. .. .. .. .. ..
+ A2 A2 A1 A1 A2 A3 A1 A1 A2
+ A4 A4 A3 A3 A5 A5 A3 A4 A4
+ A6 A6 A5 A5 A7 A8 A6 A6 A7
+ A8 A8 A7 A7 A10 A10 A8 A9 A9
+ .. .. .. .. .. .. .. .. ..
+ Thanks wikipedia 'Non-standard RAID levels' for the layout
+ figures:
+ http://en.wikipedia.org/wiki/Non-standard_RAID_levels
+
<#raid_devs>: The number of devices composing the array.
Each device consists of two entries. The first is the device
containing the metadata (if any); the second is the one containing the
More information about the dm-devel
mailing list