| NeilBrown | 9d09e66 | 2011-01-13 20:00:02 +0000 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2010-2011 Neil Brown | 
|  | 3 | * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved. | 
|  | 4 | * | 
|  | 5 | * This file is released under the GPL. | 
|  | 6 | */ | 
|  | 7 |  | 
|  | 8 | #include <linux/slab.h> | 
|  | 9 |  | 
|  | 10 | #include "md.h" | 
|  | 11 | #include "raid5.h" | 
|  | 12 | #include "dm.h" | 
|  | 13 | #include "bitmap.h" | 
|  | 14 |  | 
|  | 15 | #define DM_MSG_PREFIX "raid" | 
|  | 16 |  | 
|  | 17 | /* | 
|  | 18 | * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then | 
|  | 19 | * make it so the flag doesn't set anything. | 
|  | 20 | */ | 
|  | 21 | #ifndef MD_SYNC_STATE_FORCED | 
|  | 22 | #define MD_SYNC_STATE_FORCED 0 | 
|  | 23 | #endif | 
|  | 24 |  | 
|  | 25 | struct raid_dev { | 
|  | 26 | /* | 
|  | 27 | * Two DM devices, one to hold metadata and one to hold the | 
|  | 28 | * actual data/parity.  The reason for this is to not confuse | 
|  | 29 | * ti->len and give more flexibility in altering size and | 
|  | 30 | * characteristics. | 
|  | 31 | * | 
|  | 32 | * While it is possible for this device to be associated | 
|  | 33 | * with a different physical device than the data_dev, it | 
|  | 34 | * is intended for it to be the same. | 
|  | 35 | *    |--------- Physical Device ---------| | 
|  | 36 | *    |- meta_dev -|------ data_dev ------| | 
|  | 37 | */ | 
|  | 38 | struct dm_dev *meta_dev; | 
|  | 39 | struct dm_dev *data_dev; | 
|  | 40 | struct mdk_rdev_s rdev; | 
|  | 41 | }; | 
|  | 42 |  | 
|  | 43 | /* | 
|  | 44 | * Flags for rs->print_flags field. | 
|  | 45 | */ | 
|  | 46 | #define DMPF_DAEMON_SLEEP      0x1 | 
|  | 47 | #define DMPF_MAX_WRITE_BEHIND  0x2 | 
|  | 48 | #define DMPF_SYNC              0x4 | 
|  | 49 | #define DMPF_NOSYNC            0x8 | 
|  | 50 | #define DMPF_STRIPE_CACHE      0x10 | 
|  | 51 | #define DMPF_MIN_RECOVERY_RATE 0x20 | 
|  | 52 | #define DMPF_MAX_RECOVERY_RATE 0x40 | 
|  | 53 |  | 
|  | 54 | struct raid_set { | 
|  | 55 | struct dm_target *ti; | 
|  | 56 |  | 
|  | 57 | uint64_t print_flags; | 
|  | 58 |  | 
|  | 59 | struct mddev_s md; | 
|  | 60 | struct raid_type *raid_type; | 
|  | 61 | struct dm_target_callbacks callbacks; | 
|  | 62 |  | 
|  | 63 | struct raid_dev dev[0]; | 
|  | 64 | }; | 
|  | 65 |  | 
|  | 66 | /* Supported raid types and properties. */ | 
|  | 67 | static struct raid_type { | 
|  | 68 | const char *name;		/* RAID algorithm. */ | 
|  | 69 | const char *descr;		/* Descriptor text for logging. */ | 
|  | 70 | const unsigned parity_devs;	/* # of parity devices. */ | 
|  | 71 | const unsigned minimal_devs;	/* minimal # of devices in set. */ | 
|  | 72 | const unsigned level;		/* RAID level. */ | 
|  | 73 | const unsigned algorithm;	/* RAID algorithm. */ | 
|  | 74 | } raid_types[] = { | 
|  | 75 | {"raid4",    "RAID4 (dedicated parity disk)",	1, 2, 5, ALGORITHM_PARITY_0}, | 
|  | 76 | {"raid5_la", "RAID5 (left asymmetric)",		1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 
|  | 77 | {"raid5_ra", "RAID5 (right asymmetric)",	1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 
|  | 78 | {"raid5_ls", "RAID5 (left symmetric)",		1, 2, 5, ALGORITHM_LEFT_SYMMETRIC}, | 
|  | 79 | {"raid5_rs", "RAID5 (right symmetric)",		1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC}, | 
|  | 80 | {"raid6_zr", "RAID6 (zero restart)",		2, 4, 6, ALGORITHM_ROTATING_ZERO_RESTART}, | 
|  | 81 | {"raid6_nr", "RAID6 (N restart)",		2, 4, 6, ALGORITHM_ROTATING_N_RESTART}, | 
|  | 82 | {"raid6_nc", "RAID6 (N continue)",		2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 
|  | 83 | }; | 
|  | 84 |  | 
|  | 85 | static struct raid_type *get_raid_type(char *name) | 
|  | 86 | { | 
|  | 87 | int i; | 
|  | 88 |  | 
|  | 89 | for (i = 0; i < ARRAY_SIZE(raid_types); i++) | 
|  | 90 | if (!strcmp(raid_types[i].name, name)) | 
|  | 91 | return &raid_types[i]; | 
|  | 92 |  | 
|  | 93 | return NULL; | 
|  | 94 | } | 
|  | 95 |  | 
|  | 96 | static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *raid_type, unsigned raid_devs) | 
|  | 97 | { | 
|  | 98 | unsigned i; | 
|  | 99 | struct raid_set *rs; | 
|  | 100 | sector_t sectors_per_dev; | 
|  | 101 |  | 
|  | 102 | if (raid_devs <= raid_type->parity_devs) { | 
|  | 103 | ti->error = "Insufficient number of devices"; | 
|  | 104 | return ERR_PTR(-EINVAL); | 
|  | 105 | } | 
|  | 106 |  | 
|  | 107 | sectors_per_dev = ti->len; | 
|  | 108 | if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { | 
|  | 109 | ti->error = "Target length not divisible by number of data devices"; | 
|  | 110 | return ERR_PTR(-EINVAL); | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); | 
|  | 114 | if (!rs) { | 
|  | 115 | ti->error = "Cannot allocate raid context"; | 
|  | 116 | return ERR_PTR(-ENOMEM); | 
|  | 117 | } | 
|  | 118 |  | 
|  | 119 | mddev_init(&rs->md); | 
|  | 120 |  | 
|  | 121 | rs->ti = ti; | 
|  | 122 | rs->raid_type = raid_type; | 
|  | 123 | rs->md.raid_disks = raid_devs; | 
|  | 124 | rs->md.level = raid_type->level; | 
|  | 125 | rs->md.new_level = rs->md.level; | 
|  | 126 | rs->md.dev_sectors = sectors_per_dev; | 
|  | 127 | rs->md.layout = raid_type->algorithm; | 
|  | 128 | rs->md.new_layout = rs->md.layout; | 
|  | 129 | rs->md.delta_disks = 0; | 
|  | 130 | rs->md.recovery_cp = 0; | 
|  | 131 |  | 
|  | 132 | for (i = 0; i < raid_devs; i++) | 
|  | 133 | md_rdev_init(&rs->dev[i].rdev); | 
|  | 134 |  | 
|  | 135 | /* | 
|  | 136 | * Remaining items to be initialized by further RAID params: | 
|  | 137 | *  rs->md.persistent | 
|  | 138 | *  rs->md.external | 
|  | 139 | *  rs->md.chunk_sectors | 
|  | 140 | *  rs->md.new_chunk_sectors | 
|  | 141 | */ | 
|  | 142 |  | 
|  | 143 | return rs; | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | static void context_free(struct raid_set *rs) | 
|  | 147 | { | 
|  | 148 | int i; | 
|  | 149 |  | 
|  | 150 | for (i = 0; i < rs->md.raid_disks; i++) | 
|  | 151 | if (rs->dev[i].data_dev) | 
|  | 152 | dm_put_device(rs->ti, rs->dev[i].data_dev); | 
|  | 153 |  | 
|  | 154 | kfree(rs); | 
|  | 155 | } | 
|  | 156 |  | 
|  | 157 | /* | 
|  | 158 | * For every device we have two words | 
|  | 159 | *  <meta_dev>: meta device name or '-' if missing | 
|  | 160 | *  <data_dev>: data device name or '-' if missing | 
|  | 161 | * | 
|  | 162 | * This code parses those words. | 
|  | 163 | */ | 
|  | 164 | static int dev_parms(struct raid_set *rs, char **argv) | 
|  | 165 | { | 
|  | 166 | int i; | 
|  | 167 | int rebuild = 0; | 
|  | 168 | int metadata_available = 0; | 
|  | 169 | int ret = 0; | 
|  | 170 |  | 
|  | 171 | for (i = 0; i < rs->md.raid_disks; i++, argv += 2) { | 
|  | 172 | rs->dev[i].rdev.raid_disk = i; | 
|  | 173 |  | 
|  | 174 | rs->dev[i].meta_dev = NULL; | 
|  | 175 | rs->dev[i].data_dev = NULL; | 
|  | 176 |  | 
|  | 177 | /* | 
|  | 178 | * There are no offsets, since there is a separate device | 
|  | 179 | * for data and metadata. | 
|  | 180 | */ | 
|  | 181 | rs->dev[i].rdev.data_offset = 0; | 
|  | 182 | rs->dev[i].rdev.mddev = &rs->md; | 
|  | 183 |  | 
|  | 184 | if (strcmp(argv[0], "-")) { | 
|  | 185 | rs->ti->error = "Metadata devices not supported"; | 
|  | 186 | return -EINVAL; | 
|  | 187 | } | 
|  | 188 |  | 
|  | 189 | if (!strcmp(argv[1], "-")) { | 
|  | 190 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags) && | 
|  | 191 | (!rs->dev[i].rdev.recovery_offset)) { | 
|  | 192 | rs->ti->error = "Drive designated for rebuild not specified"; | 
|  | 193 | return -EINVAL; | 
|  | 194 | } | 
|  | 195 |  | 
|  | 196 | continue; | 
|  | 197 | } | 
|  | 198 |  | 
|  | 199 | ret = dm_get_device(rs->ti, argv[1], | 
|  | 200 | dm_table_get_mode(rs->ti->table), | 
|  | 201 | &rs->dev[i].data_dev); | 
|  | 202 | if (ret) { | 
|  | 203 | rs->ti->error = "RAID device lookup failure"; | 
|  | 204 | return ret; | 
|  | 205 | } | 
|  | 206 |  | 
|  | 207 | rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; | 
|  | 208 | list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); | 
|  | 209 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) | 
|  | 210 | rebuild++; | 
|  | 211 | } | 
|  | 212 |  | 
|  | 213 | if (metadata_available) { | 
|  | 214 | rs->md.external = 0; | 
|  | 215 | rs->md.persistent = 1; | 
|  | 216 | rs->md.major_version = 2; | 
|  | 217 | } else if (rebuild && !rs->md.recovery_cp) { | 
|  | 218 | /* | 
|  | 219 | * Without metadata, we will not be able to tell if the array | 
|  | 220 | * is in-sync or not - we must assume it is not.  Therefore, | 
|  | 221 | * it is impossible to rebuild a drive. | 
|  | 222 | * | 
|  | 223 | * Even if there is metadata, the on-disk information may | 
|  | 224 | * indicate that the array is not in-sync and it will then | 
|  | 225 | * fail at that time. | 
|  | 226 | * | 
|  | 227 | * User could specify 'nosync' option if desperate. | 
|  | 228 | */ | 
|  | 229 | DMERR("Unable to rebuild drive while array is not in-sync"); | 
|  | 230 | rs->ti->error = "RAID device lookup failure"; | 
|  | 231 | return -EINVAL; | 
|  | 232 | } | 
|  | 233 |  | 
|  | 234 | return 0; | 
|  | 235 | } | 
|  | 236 |  | 
|  | 237 | /* | 
|  | 238 | * Possible arguments are... | 
|  | 239 | * RAID456: | 
|  | 240 | *	<chunk_size> [optional_args] | 
|  | 241 | * | 
|  | 242 | * Optional args: | 
|  | 243 | *    [[no]sync]			Force or prevent recovery of the entire array | 
|  | 244 | *    [rebuild <idx>]			Rebuild the drive indicated by the index | 
|  | 245 | *    [daemon_sleep <ms>]		Time between bitmap daemon work to clear bits | 
|  | 246 | *    [min_recovery_rate <kB/sec/disk>]	Throttle RAID initialization | 
|  | 247 | *    [max_recovery_rate <kB/sec/disk>]	Throttle RAID initialization | 
|  | 248 | *    [max_write_behind <sectors>]	See '-write-behind=' (man mdadm) | 
|  | 249 | *    [stripe_cache <sectors>]		Stripe cache size for higher RAIDs | 
|  | 250 | */ | 
|  | 251 | static int parse_raid_params(struct raid_set *rs, char **argv, | 
|  | 252 | unsigned num_raid_params) | 
|  | 253 | { | 
|  | 254 | unsigned i, rebuild_cnt = 0; | 
|  | 255 | unsigned long value; | 
|  | 256 | char *key; | 
|  | 257 |  | 
|  | 258 | /* | 
|  | 259 | * First, parse the in-order required arguments | 
|  | 260 | */ | 
|  | 261 | if ((strict_strtoul(argv[0], 10, &value) < 0) || | 
|  | 262 | !is_power_of_2(value) || (value < 8)) { | 
|  | 263 | rs->ti->error = "Bad chunk size"; | 
|  | 264 | return -EINVAL; | 
|  | 265 | } | 
|  | 266 |  | 
|  | 267 | rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; | 
|  | 268 | argv++; | 
|  | 269 | num_raid_params--; | 
|  | 270 |  | 
|  | 271 | /* | 
|  | 272 | * Second, parse the unordered optional arguments | 
|  | 273 | */ | 
|  | 274 | for (i = 0; i < rs->md.raid_disks; i++) | 
|  | 275 | set_bit(In_sync, &rs->dev[i].rdev.flags); | 
|  | 276 |  | 
|  | 277 | for (i = 0; i < num_raid_params; i++) { | 
|  | 278 | if (!strcmp(argv[i], "nosync")) { | 
|  | 279 | rs->md.recovery_cp = MaxSector; | 
|  | 280 | rs->print_flags |= DMPF_NOSYNC; | 
|  | 281 | rs->md.flags |= MD_SYNC_STATE_FORCED; | 
|  | 282 | continue; | 
|  | 283 | } | 
|  | 284 | if (!strcmp(argv[i], "sync")) { | 
|  | 285 | rs->md.recovery_cp = 0; | 
|  | 286 | rs->print_flags |= DMPF_SYNC; | 
|  | 287 | rs->md.flags |= MD_SYNC_STATE_FORCED; | 
|  | 288 | continue; | 
|  | 289 | } | 
|  | 290 |  | 
|  | 291 | /* The rest of the optional arguments come in key/value pairs */ | 
|  | 292 | if ((i + 1) >= num_raid_params) { | 
|  | 293 | rs->ti->error = "Wrong number of raid parameters given"; | 
|  | 294 | return -EINVAL; | 
|  | 295 | } | 
|  | 296 |  | 
|  | 297 | key = argv[i++]; | 
|  | 298 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 
|  | 299 | rs->ti->error = "Bad numerical argument given in raid params"; | 
|  | 300 | return -EINVAL; | 
|  | 301 | } | 
|  | 302 |  | 
|  | 303 | if (!strcmp(key, "rebuild")) { | 
|  | 304 | if (++rebuild_cnt > rs->raid_type->parity_devs) { | 
|  | 305 | rs->ti->error = "Too many rebuild drives given"; | 
|  | 306 | return -EINVAL; | 
|  | 307 | } | 
|  | 308 | if (value > rs->md.raid_disks) { | 
|  | 309 | rs->ti->error = "Invalid rebuild index given"; | 
|  | 310 | return -EINVAL; | 
|  | 311 | } | 
|  | 312 | clear_bit(In_sync, &rs->dev[value].rdev.flags); | 
|  | 313 | rs->dev[value].rdev.recovery_offset = 0; | 
|  | 314 | } else if (!strcmp(key, "max_write_behind")) { | 
|  | 315 | rs->print_flags |= DMPF_MAX_WRITE_BEHIND; | 
|  | 316 |  | 
|  | 317 | /* | 
|  | 318 | * In device-mapper, we specify things in sectors, but | 
|  | 319 | * MD records this value in kB | 
|  | 320 | */ | 
|  | 321 | value /= 2; | 
|  | 322 | if (value > COUNTER_MAX) { | 
|  | 323 | rs->ti->error = "Max write-behind limit out of range"; | 
|  | 324 | return -EINVAL; | 
|  | 325 | } | 
|  | 326 | rs->md.bitmap_info.max_write_behind = value; | 
|  | 327 | } else if (!strcmp(key, "daemon_sleep")) { | 
|  | 328 | rs->print_flags |= DMPF_DAEMON_SLEEP; | 
|  | 329 | if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { | 
|  | 330 | rs->ti->error = "daemon sleep period out of range"; | 
|  | 331 | return -EINVAL; | 
|  | 332 | } | 
|  | 333 | rs->md.bitmap_info.daemon_sleep = value; | 
|  | 334 | } else if (!strcmp(key, "stripe_cache")) { | 
|  | 335 | rs->print_flags |= DMPF_STRIPE_CACHE; | 
|  | 336 |  | 
|  | 337 | /* | 
|  | 338 | * In device-mapper, we specify things in sectors, but | 
|  | 339 | * MD records this value in kB | 
|  | 340 | */ | 
|  | 341 | value /= 2; | 
|  | 342 |  | 
|  | 343 | if (rs->raid_type->level < 5) { | 
|  | 344 | rs->ti->error = "Inappropriate argument: stripe_cache"; | 
|  | 345 | return -EINVAL; | 
|  | 346 | } | 
|  | 347 | if (raid5_set_cache_size(&rs->md, (int)value)) { | 
|  | 348 | rs->ti->error = "Bad stripe_cache size"; | 
|  | 349 | return -EINVAL; | 
|  | 350 | } | 
|  | 351 | } else if (!strcmp(key, "min_recovery_rate")) { | 
|  | 352 | rs->print_flags |= DMPF_MIN_RECOVERY_RATE; | 
|  | 353 | if (value > INT_MAX) { | 
|  | 354 | rs->ti->error = "min_recovery_rate out of range"; | 
|  | 355 | return -EINVAL; | 
|  | 356 | } | 
|  | 357 | rs->md.sync_speed_min = (int)value; | 
|  | 358 | } else if (!strcmp(key, "max_recovery_rate")) { | 
|  | 359 | rs->print_flags |= DMPF_MAX_RECOVERY_RATE; | 
|  | 360 | if (value > INT_MAX) { | 
|  | 361 | rs->ti->error = "max_recovery_rate out of range"; | 
|  | 362 | return -EINVAL; | 
|  | 363 | } | 
|  | 364 | rs->md.sync_speed_max = (int)value; | 
|  | 365 | } else { | 
|  | 366 | DMERR("Unable to parse RAID parameter: %s", key); | 
|  | 367 | rs->ti->error = "Unable to parse RAID parameters"; | 
|  | 368 | return -EINVAL; | 
|  | 369 | } | 
|  | 370 | } | 
|  | 371 |  | 
|  | 372 | /* Assume there are no metadata devices until the drives are parsed */ | 
|  | 373 | rs->md.persistent = 0; | 
|  | 374 | rs->md.external = 1; | 
|  | 375 |  | 
|  | 376 | return 0; | 
|  | 377 | } | 
|  | 378 |  | 
|  | 379 | static void do_table_event(struct work_struct *ws) | 
|  | 380 | { | 
|  | 381 | struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); | 
|  | 382 |  | 
|  | 383 | dm_table_event(rs->ti->table); | 
|  | 384 | } | 
|  | 385 |  | 
|  | 386 | static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | 
|  | 387 | { | 
|  | 388 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | 
|  | 389 |  | 
|  | 390 | return md_raid5_congested(&rs->md, bits); | 
|  | 391 | } | 
|  | 392 |  | 
| NeilBrown | 9d09e66 | 2011-01-13 20:00:02 +0000 | [diff] [blame] | 393 | /* | 
|  | 394 | * Construct a RAID4/5/6 mapping: | 
|  | 395 | * Args: | 
|  | 396 | *	<raid_type> <#raid_params> <raid_params>		\ | 
|  | 397 | *	<#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> } | 
|  | 398 | * | 
|  | 399 | * ** metadata devices are not supported yet, use '-' instead ** | 
|  | 400 | * | 
|  | 401 | * <raid_params> varies by <raid_type>.  See 'parse_raid_params' for | 
|  | 402 | * details on possible <raid_params>. | 
|  | 403 | */ | 
|  | 404 | static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | 
|  | 405 | { | 
|  | 406 | int ret; | 
|  | 407 | struct raid_type *rt; | 
|  | 408 | unsigned long num_raid_params, num_raid_devs; | 
|  | 409 | struct raid_set *rs = NULL; | 
|  | 410 |  | 
|  | 411 | /* Must have at least <raid_type> <#raid_params> */ | 
|  | 412 | if (argc < 2) { | 
|  | 413 | ti->error = "Too few arguments"; | 
|  | 414 | return -EINVAL; | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | /* raid type */ | 
|  | 418 | rt = get_raid_type(argv[0]); | 
|  | 419 | if (!rt) { | 
|  | 420 | ti->error = "Unrecognised raid_type"; | 
|  | 421 | return -EINVAL; | 
|  | 422 | } | 
|  | 423 | argc--; | 
|  | 424 | argv++; | 
|  | 425 |  | 
|  | 426 | /* number of RAID parameters */ | 
|  | 427 | if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) { | 
|  | 428 | ti->error = "Cannot understand number of RAID parameters"; | 
|  | 429 | return -EINVAL; | 
|  | 430 | } | 
|  | 431 | argc--; | 
|  | 432 | argv++; | 
|  | 433 |  | 
|  | 434 | /* Skip over RAID params for now and find out # of devices */ | 
|  | 435 | if (num_raid_params + 1 > argc) { | 
|  | 436 | ti->error = "Arguments do not agree with counts given"; | 
|  | 437 | return -EINVAL; | 
|  | 438 | } | 
|  | 439 |  | 
|  | 440 | if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || | 
|  | 441 | (num_raid_devs >= INT_MAX)) { | 
|  | 442 | ti->error = "Cannot understand number of raid devices"; | 
|  | 443 | return -EINVAL; | 
|  | 444 | } | 
|  | 445 |  | 
|  | 446 | rs = context_alloc(ti, rt, (unsigned)num_raid_devs); | 
|  | 447 | if (IS_ERR(rs)) | 
|  | 448 | return PTR_ERR(rs); | 
|  | 449 |  | 
|  | 450 | ret = parse_raid_params(rs, argv, (unsigned)num_raid_params); | 
|  | 451 | if (ret) | 
|  | 452 | goto bad; | 
|  | 453 |  | 
|  | 454 | ret = -EINVAL; | 
|  | 455 |  | 
|  | 456 | argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */ | 
|  | 457 | argv += num_raid_params + 1; | 
|  | 458 |  | 
|  | 459 | if (argc != (num_raid_devs * 2)) { | 
|  | 460 | ti->error = "Supplied RAID devices does not match the count given"; | 
|  | 461 | goto bad; | 
|  | 462 | } | 
|  | 463 |  | 
|  | 464 | ret = dev_parms(rs, argv); | 
|  | 465 | if (ret) | 
|  | 466 | goto bad; | 
|  | 467 |  | 
|  | 468 | INIT_WORK(&rs->md.event_work, do_table_event); | 
|  | 469 | ti->split_io = rs->md.chunk_sectors; | 
|  | 470 | ti->private = rs; | 
|  | 471 |  | 
|  | 472 | mutex_lock(&rs->md.reconfig_mutex); | 
|  | 473 | ret = md_run(&rs->md); | 
|  | 474 | rs->md.in_sync = 0; /* Assume already marked dirty */ | 
|  | 475 | mutex_unlock(&rs->md.reconfig_mutex); | 
|  | 476 |  | 
|  | 477 | if (ret) { | 
|  | 478 | ti->error = "Fail to run raid array"; | 
|  | 479 | goto bad; | 
|  | 480 | } | 
|  | 481 |  | 
|  | 482 | rs->callbacks.congested_fn = raid_is_congested; | 
| NeilBrown | 9d09e66 | 2011-01-13 20:00:02 +0000 | [diff] [blame] | 483 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 
|  | 484 |  | 
|  | 485 | return 0; | 
|  | 486 |  | 
|  | 487 | bad: | 
|  | 488 | context_free(rs); | 
|  | 489 |  | 
|  | 490 | return ret; | 
|  | 491 | } | 
|  | 492 |  | 
|  | 493 | static void raid_dtr(struct dm_target *ti) | 
|  | 494 | { | 
|  | 495 | struct raid_set *rs = ti->private; | 
|  | 496 |  | 
|  | 497 | list_del_init(&rs->callbacks.list); | 
|  | 498 | md_stop(&rs->md); | 
|  | 499 | context_free(rs); | 
|  | 500 | } | 
|  | 501 |  | 
|  | 502 | static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) | 
|  | 503 | { | 
|  | 504 | struct raid_set *rs = ti->private; | 
|  | 505 | mddev_t *mddev = &rs->md; | 
|  | 506 |  | 
|  | 507 | mddev->pers->make_request(mddev, bio); | 
|  | 508 |  | 
|  | 509 | return DM_MAPIO_SUBMITTED; | 
|  | 510 | } | 
|  | 511 |  | 
|  | 512 | static int raid_status(struct dm_target *ti, status_type_t type, | 
|  | 513 | char *result, unsigned maxlen) | 
|  | 514 | { | 
|  | 515 | struct raid_set *rs = ti->private; | 
|  | 516 | unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ | 
|  | 517 | unsigned sz = 0; | 
|  | 518 | int i; | 
|  | 519 | sector_t sync; | 
|  | 520 |  | 
|  | 521 | switch (type) { | 
|  | 522 | case STATUSTYPE_INFO: | 
|  | 523 | DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks); | 
|  | 524 |  | 
|  | 525 | for (i = 0; i < rs->md.raid_disks; i++) { | 
|  | 526 | if (test_bit(Faulty, &rs->dev[i].rdev.flags)) | 
|  | 527 | DMEMIT("D"); | 
|  | 528 | else if (test_bit(In_sync, &rs->dev[i].rdev.flags)) | 
|  | 529 | DMEMIT("A"); | 
|  | 530 | else | 
|  | 531 | DMEMIT("a"); | 
|  | 532 | } | 
|  | 533 |  | 
|  | 534 | if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery)) | 
|  | 535 | sync = rs->md.curr_resync_completed; | 
|  | 536 | else | 
|  | 537 | sync = rs->md.recovery_cp; | 
|  | 538 |  | 
|  | 539 | if (sync > rs->md.resync_max_sectors) | 
|  | 540 | sync = rs->md.resync_max_sectors; | 
|  | 541 |  | 
|  | 542 | DMEMIT(" %llu/%llu", | 
|  | 543 | (unsigned long long) sync, | 
|  | 544 | (unsigned long long) rs->md.resync_max_sectors); | 
|  | 545 |  | 
|  | 546 | break; | 
|  | 547 | case STATUSTYPE_TABLE: | 
|  | 548 | /* The string you would use to construct this array */ | 
|  | 549 | for (i = 0; i < rs->md.raid_disks; i++) | 
|  | 550 | if (rs->dev[i].data_dev && | 
|  | 551 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) | 
|  | 552 | raid_param_cnt++; /* for rebuilds */ | 
|  | 553 |  | 
|  | 554 | raid_param_cnt += (hweight64(rs->print_flags) * 2); | 
|  | 555 | if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) | 
|  | 556 | raid_param_cnt--; | 
|  | 557 |  | 
|  | 558 | DMEMIT("%s %u %u", rs->raid_type->name, | 
|  | 559 | raid_param_cnt, rs->md.chunk_sectors); | 
|  | 560 |  | 
|  | 561 | if ((rs->print_flags & DMPF_SYNC) && | 
|  | 562 | (rs->md.recovery_cp == MaxSector)) | 
|  | 563 | DMEMIT(" sync"); | 
|  | 564 | if (rs->print_flags & DMPF_NOSYNC) | 
|  | 565 | DMEMIT(" nosync"); | 
|  | 566 |  | 
|  | 567 | for (i = 0; i < rs->md.raid_disks; i++) | 
|  | 568 | if (rs->dev[i].data_dev && | 
|  | 569 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) | 
|  | 570 | DMEMIT(" rebuild %u", i); | 
|  | 571 |  | 
|  | 572 | if (rs->print_flags & DMPF_DAEMON_SLEEP) | 
|  | 573 | DMEMIT(" daemon_sleep %lu", | 
|  | 574 | rs->md.bitmap_info.daemon_sleep); | 
|  | 575 |  | 
|  | 576 | if (rs->print_flags & DMPF_MIN_RECOVERY_RATE) | 
|  | 577 | DMEMIT(" min_recovery_rate %d", rs->md.sync_speed_min); | 
|  | 578 |  | 
|  | 579 | if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) | 
|  | 580 | DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); | 
|  | 581 |  | 
|  | 582 | if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) | 
|  | 583 | DMEMIT(" max_write_behind %lu", | 
|  | 584 | rs->md.bitmap_info.max_write_behind); | 
|  | 585 |  | 
|  | 586 | if (rs->print_flags & DMPF_STRIPE_CACHE) { | 
|  | 587 | raid5_conf_t *conf = rs->md.private; | 
|  | 588 |  | 
|  | 589 | /* convert from kiB to sectors */ | 
|  | 590 | DMEMIT(" stripe_cache %d", | 
|  | 591 | conf ? conf->max_nr_stripes * 2 : 0); | 
|  | 592 | } | 
|  | 593 |  | 
|  | 594 | DMEMIT(" %d", rs->md.raid_disks); | 
|  | 595 | for (i = 0; i < rs->md.raid_disks; i++) { | 
|  | 596 | DMEMIT(" -"); /* metadata device */ | 
|  | 597 |  | 
|  | 598 | if (rs->dev[i].data_dev) | 
|  | 599 | DMEMIT(" %s", rs->dev[i].data_dev->name); | 
|  | 600 | else | 
|  | 601 | DMEMIT(" -"); | 
|  | 602 | } | 
|  | 603 | } | 
|  | 604 |  | 
|  | 605 | return 0; | 
|  | 606 | } | 
|  | 607 |  | 
|  | 608 | static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) | 
|  | 609 | { | 
|  | 610 | struct raid_set *rs = ti->private; | 
|  | 611 | unsigned i; | 
|  | 612 | int ret = 0; | 
|  | 613 |  | 
|  | 614 | for (i = 0; !ret && i < rs->md.raid_disks; i++) | 
|  | 615 | if (rs->dev[i].data_dev) | 
|  | 616 | ret = fn(ti, | 
|  | 617 | rs->dev[i].data_dev, | 
|  | 618 | 0, /* No offset on data devs */ | 
|  | 619 | rs->md.dev_sectors, | 
|  | 620 | data); | 
|  | 621 |  | 
|  | 622 | return ret; | 
|  | 623 | } | 
|  | 624 |  | 
|  | 625 | static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) | 
|  | 626 | { | 
|  | 627 | struct raid_set *rs = ti->private; | 
|  | 628 | unsigned chunk_size = rs->md.chunk_sectors << 9; | 
|  | 629 | raid5_conf_t *conf = rs->md.private; | 
|  | 630 |  | 
|  | 631 | blk_limits_io_min(limits, chunk_size); | 
|  | 632 | blk_limits_io_opt(limits, chunk_size * (conf->raid_disks - conf->max_degraded)); | 
|  | 633 | } | 
|  | 634 |  | 
|  | 635 | static void raid_presuspend(struct dm_target *ti) | 
|  | 636 | { | 
|  | 637 | struct raid_set *rs = ti->private; | 
|  | 638 |  | 
|  | 639 | md_stop_writes(&rs->md); | 
|  | 640 | } | 
|  | 641 |  | 
|  | 642 | static void raid_postsuspend(struct dm_target *ti) | 
|  | 643 | { | 
|  | 644 | struct raid_set *rs = ti->private; | 
|  | 645 |  | 
|  | 646 | mddev_suspend(&rs->md); | 
|  | 647 | } | 
|  | 648 |  | 
|  | 649 | static void raid_resume(struct dm_target *ti) | 
|  | 650 | { | 
|  | 651 | struct raid_set *rs = ti->private; | 
|  | 652 |  | 
|  | 653 | mddev_resume(&rs->md); | 
|  | 654 | } | 
|  | 655 |  | 
|  | 656 | static struct target_type raid_target = { | 
|  | 657 | .name = "raid", | 
|  | 658 | .version = {1, 0, 0}, | 
|  | 659 | .module = THIS_MODULE, | 
|  | 660 | .ctr = raid_ctr, | 
|  | 661 | .dtr = raid_dtr, | 
|  | 662 | .map = raid_map, | 
|  | 663 | .status = raid_status, | 
|  | 664 | .iterate_devices = raid_iterate_devices, | 
|  | 665 | .io_hints = raid_io_hints, | 
|  | 666 | .presuspend = raid_presuspend, | 
|  | 667 | .postsuspend = raid_postsuspend, | 
|  | 668 | .resume = raid_resume, | 
|  | 669 | }; | 
|  | 670 |  | 
|  | 671 | static int __init dm_raid_init(void) | 
|  | 672 | { | 
|  | 673 | return dm_register_target(&raid_target); | 
|  | 674 | } | 
|  | 675 |  | 
|  | 676 | static void __exit dm_raid_exit(void) | 
|  | 677 | { | 
|  | 678 | dm_unregister_target(&raid_target); | 
|  | 679 | } | 
|  | 680 |  | 
|  | 681 | module_init(dm_raid_init); | 
|  | 682 | module_exit(dm_raid_exit); | 
|  | 683 |  | 
|  | 684 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | 
|  | 685 | MODULE_ALIAS("dm-raid4"); | 
|  | 686 | MODULE_ALIAS("dm-raid5"); | 
|  | 687 | MODULE_ALIAS("dm-raid6"); | 
|  | 688 | MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>"); | 
|  | 689 | MODULE_LICENSE("GPL"); |