1 /* $OpenBSD: rf_disks.c,v 1.12 2007/06/05 00:38:22 deraadt Exp $ */
2 /* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */
3
4 /*
5 * Copyright (c) 1999 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Greg Oster
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39 /*
40 * Copyright (c) 1995 Carnegie-Mellon University.
41 * All rights reserved.
42 *
43 * Author: Mark Holland
44 *
45 * Permission to use, copy, modify and distribute this software and
46 * its documentation is hereby granted, provided that both the copyright
47 * notice and this permission notice appear in all copies of the
48 * software, derivative works or modified versions, and any portions
49 * thereof, and that both notices appear in supporting documentation.
50 *
51 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
52 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
53 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
54 *
55 * Carnegie Mellon requests users of this software to return to
56 *
57 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
58 * School of Computer Science
59 * Carnegie Mellon University
60 * Pittsburgh PA 15213-3890
61 *
62 * any improvements or extensions that they make and grant Carnegie the
63 * rights to redistribute these changes.
64 */
65
66 /***************************************************************
67 * rf_disks.c -- Code to perform operations on the actual disks.
68 ***************************************************************/
69
70 #include "rf_types.h"
71 #include "rf_raid.h"
72 #include "rf_alloclist.h"
73 #include "rf_utils.h"
74 #include "rf_configure.h"
75 #include "rf_general.h"
76 #include "rf_options.h"
77 #include "rf_kintf.h"
78
79 #if defined(__NetBSD__)
80 #include "rf_netbsd.h"
81 #elif defined(__OpenBSD__)
82 #include "rf_openbsd.h"
83 #endif
84
85 #include <sys/types.h>
86 #include <sys/param.h>
87 #include <sys/systm.h>
88 #include <sys/proc.h>
89 #include <sys/ioctl.h>
90 #include <sys/fcntl.h>
91 #ifdef __NETBSD__
92 #include <sys/vnode.h>
93 #endif /* __NETBSD__ */
94
95 int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
96 void rf_print_label_status(RF_Raid_t *, int, int, char *,
97 RF_ComponentLabel_t *);
98 int rf_check_label_vitals(RF_Raid_t *, int, int, char *,
99 RF_ComponentLabel_t *, int, int);
100
101 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
102 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
103
104 /****************************************************************************
105 *
106 * Initialize the disks comprising the array.
107 *
108 * We want the spare disks to have regular row,col numbers so that we can
109 * easily substitue a spare for a failed disk. But, the driver code assumes
110 * throughout that the array contains numRow by numCol _non-spare_ disks, so
111 * it's not clear how to fit in the spares. This is an unfortunate holdover
112 * from raidSim. The quick and dirty fix is to make row zero bigger than the
113 * rest, and put all the spares in it. This probably needs to get changed
114 * eventually.
115 *
116 ****************************************************************************/
117 int
118 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
119 RF_Config_t *cfgPtr)
120 {
121 RF_RaidDisk_t **disks;
122 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
123 RF_RowCol_t r, c;
124 int bs, ret;
125 unsigned i, count, foundone = 0, numFailuresThisRow;
126 int force;
127
128 force = cfgPtr->force;
129
130 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
131 if (ret)
132 goto fail;
133
134 disks = raidPtr->Disks;
135
136 for (r = 0; r < raidPtr->numRow; r++) {
137 numFailuresThisRow = 0;
138 for (c = 0; c < raidPtr->numCol; c++) {
139 ret = rf_ConfigureDisk(raidPtr,
140 &cfgPtr->devnames[r][c][0], &disks[r][c], r, c);
141
142 if (ret)
143 goto fail;
144
145 if (disks[r][c].status == rf_ds_optimal) {
146 raidread_component_label(
147 raidPtr->raid_cinfo[r][c].ci_dev,
148 raidPtr->raid_cinfo[r][c].ci_vp,
149 &raidPtr->raid_cinfo[r][c].ci_label);
150 }
151
152 if (disks[r][c].status != rf_ds_optimal) {
153 numFailuresThisRow++;
154 } else {
155 if (disks[r][c].numBlocks < min_numblks)
156 min_numblks = disks[r][c].numBlocks;
157 DPRINTF7("Disk at row %d col %d: dev %s"
158 " numBlocks %ld blockSize %d (%ld MB)\n",
159 r, c, disks[r][c].devname,
160 (long int) disks[r][c].numBlocks,
161 disks[r][c].blockSize,
162 (long int) disks[r][c].numBlocks *
163 disks[r][c].blockSize / 1024 / 1024);
164 }
165 }
166 /* XXX Fix for n-fault tolerant. */
167 /*
168 * XXX This should probably check to see how many failures
169 * we can handle for this configuration !
170 */
171 if (numFailuresThisRow > 0)
172 raidPtr->status[r] = rf_rs_degraded;
173 }
174 /*
175 * All disks must be the same size & have the same block size, bs must
176 * be a power of 2.
177 */
178 bs = 0;
179 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
180 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
181 if (disks[r][c].status == rf_ds_optimal) {
182 bs = disks[r][c].blockSize;
183 foundone = 1;
184 }
185 }
186 }
187 if (!foundone) {
188 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in"
189 " the array.\n");
190 ret = EINVAL;
191 goto fail;
192 }
193 for (count = 0, i = 1; i; i <<= 1)
194 if (bs & i)
195 count++;
196 if (count != 1) {
197 RF_ERRORMSG1("Error: block size on disks (%d) must be a"
198 " power of 2.\n", bs);
199 ret = EINVAL;
200 goto fail;
201 }
202
203 if (rf_CheckLabels(raidPtr, cfgPtr)) {
204 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
205 if (force != 0) {
206 printf("raid%d: Fatal errors being ignored.\n",
207 raidPtr->raidid);
208 } else {
209 ret = EINVAL;
210 goto fail;
211 }
212 }
213
214 for (r = 0; r < raidPtr->numRow; r++) {
215 for (c = 0; c < raidPtr->numCol; c++) {
216 if (disks[r][c].status == rf_ds_optimal) {
217 if (disks[r][c].blockSize != bs) {
218 RF_ERRORMSG2("Error: block size of"
219 " disk at r %d c %d different from"
220 " disk at r 0 c 0.\n", r, c);
221 ret = EINVAL;
222 goto fail;
223 }
224 if (disks[r][c].numBlocks != min_numblks) {
225 RF_ERRORMSG3("WARNING: truncating disk"
226 " at r %d c %d to %d blocks.\n",
227 r, c, (int) min_numblks);
228 disks[r][c].numBlocks = min_numblks;
229 }
230 }
231 }
232 }
233
234 raidPtr->sectorsPerDisk = min_numblks;
235 raidPtr->logBytesPerSector = ffs(bs) - 1;
236 raidPtr->bytesPerSector = bs;
237 raidPtr->sectorMask = bs - 1;
238 return (0);
239
240 fail:
241 rf_UnconfigureVnodes(raidPtr);
242
243 return (ret);
244 }
245
246
247 /****************************************************************************
248 * Set up the data structures describing the spare disks in the array.
249 * Recall from the above comment that the spare disk descriptors are stored
250 * in row zero, which is specially expanded to hold them.
251 ****************************************************************************/
252 int
253 rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
254 RF_Config_t * cfgPtr)
255 {
256 int i, ret;
257 unsigned int bs;
258 RF_RaidDisk_t *disks;
259 int num_spares_done;
260
261 num_spares_done = 0;
262
263 /*
264 * The space for the spares should have already been allocated by
265 * ConfigureDisks().
266 */
267
268 disks = &raidPtr->Disks[0][raidPtr->numCol];
269 for (i = 0; i < raidPtr->numSpare; i++) {
270 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
271 &disks[i], 0, raidPtr->numCol + i);
272 if (ret)
273 goto fail;
274 if (disks[i].status != rf_ds_optimal) {
275 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
276 &cfgPtr->spare_names[i][0]);
277 } else {
278 /* Change status to spare. */
279 disks[i].status = rf_ds_spare;
280 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld"
281 " blockSize %d (%ld MB).\n", i, disks[i].devname,
282 (long int) disks[i].numBlocks, disks[i].blockSize,
283 (long int) disks[i].numBlocks *
284 disks[i].blockSize / 1024 / 1024);
285 }
286 num_spares_done++;
287 }
288
289 /* Check sizes and block sizes on spare disks. */
290 bs = 1 << raidPtr->logBytesPerSector;
291 for (i = 0; i < raidPtr->numSpare; i++) {
292 if (disks[i].blockSize != bs) {
293 RF_ERRORMSG3("Block size of %d on spare disk %s is"
294 " not the same as on other disks (%d).\n",
295 disks[i].blockSize, disks[i].devname, bs);
296 ret = EINVAL;
297 goto fail;
298 }
299 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
300 RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small"
301 " to serve as a spare (need %llu blocks).\n",
302 disks[i].devname, disks[i].numBlocks,
303 raidPtr->sectorsPerDisk);
304 ret = EINVAL;
305 goto fail;
306 } else
307 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
308 RF_ERRORMSG2("Warning: truncating spare disk"
309 " %s to %llu blocks.\n", disks[i].devname,
310 raidPtr->sectorsPerDisk);
311
312 disks[i].numBlocks = raidPtr->sectorsPerDisk;
313 }
314 }
315
316 return (0);
317
318 fail:
319
320 /*
321 * Release the hold on the main components. We've failed to allocate
322 * a spare, and since we're failing, we need to free things...
323 *
324 * XXX Failing to allocate a spare is *not* that big of a deal...
325 * We *can* survive without it, if need be, esp. if we get hot
326 * adding working.
327 * If we don't fail out here, then we need a way to remove this spare...
328 * That should be easier to do here than if we are "live"...
329 */
330
331 rf_UnconfigureVnodes(raidPtr);
332
333 return (ret);
334 }
335
336 int
337 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
338 {
339 RF_RaidDisk_t **disks;
340 int ret;
341 int r;
342
343 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
344 (RF_RaidDisk_t **), raidPtr->cleanupList);
345 if (disks == NULL) {
346 ret = ENOMEM;
347 goto fail;
348 }
349 raidPtr->Disks = disks;
350 /* Get space for the device-specific stuff... */
351 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
352 sizeof(struct raidcinfo *), (struct raidcinfo **),
353 raidPtr->cleanupList);
354 if (raidPtr->raid_cinfo == NULL) {
355 ret = ENOMEM;
356 goto fail;
357 }
358
359 for (r = 0; r < raidPtr->numRow; r++) {
360 /*
361 * We allocate RF_MAXSPARE on the first row so that we
362 * have room to do hot-swapping of spares.
363 */
364 RF_CallocAndAdd(disks[r], raidPtr->numCol +
365 ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t),
366 (RF_RaidDisk_t *), raidPtr->cleanupList);
367 if (disks[r] == NULL) {
368 ret = ENOMEM;
369 goto fail;
370 }
371 /* Get more space for device specific stuff... */
372 RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol +
373 ((r == 0) ? raidPtr->numSpare : 0),
374 sizeof(struct raidcinfo), (struct raidcinfo *),
375 raidPtr->cleanupList);
376 if (raidPtr->raid_cinfo[r] == NULL) {
377 ret = ENOMEM;
378 goto fail;
379 }
380 }
381 return(0);
382 fail:
383 rf_UnconfigureVnodes(raidPtr);
384
385 return(ret);
386 }
387
388
389 /* Configure a single disk during auto-configuration at boot. */
390 int
391 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
392 RF_AutoConfig_t *auto_config)
393 {
394 RF_RaidDisk_t **disks;
395 RF_RaidDisk_t *diskPtr;
396 RF_RowCol_t r, c;
397 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
398 int bs, ret;
399 int numFailuresThisRow;
400 int force;
401 RF_AutoConfig_t *ac;
402 int parity_good;
403 int mod_counter;
404 int mod_counter_found;
405
406 #if DEBUG
407 printf("Starting autoconfiguration of RAID set...\n");
408 #endif /* DEBUG */
409 force = cfgPtr->force;
410
411 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
412 if (ret)
413 goto fail;
414
415 disks = raidPtr->Disks;
416
417 /* Assume the parity will be fine... */
418 parity_good = RF_RAID_CLEAN;
419
420 /* Check for mod_counters that are too low. */
421 mod_counter_found = 0;
422 ac = auto_config;
423 while(ac!=NULL) {
424 if (mod_counter_found == 0) {
425 mod_counter = ac->clabel->mod_counter;
426 mod_counter_found = 1;
427 } else {
428 if (ac->clabel->mod_counter > mod_counter) {
429 mod_counter = ac->clabel->mod_counter;
430 }
431 }
432 ac->flag = 0; /* Clear the general purpose flag. */
433 ac = ac->next;
434 }
435
436 for (r = 0; r < raidPtr->numRow; r++) {
437 numFailuresThisRow = 0;
438 for (c = 0; c < raidPtr->numCol; c++) {
439 diskPtr = &disks[r][c];
440
441 /* Find this row/col in the autoconfig. */
442 #if DEBUG
443 printf("Looking for %d,%d in autoconfig.\n", r, c);
444 #endif /* DEBUG */
445 ac = auto_config;
446 while(ac!=NULL) {
447 if (ac->clabel == NULL) {
448 /* Big-time bad news. */
449 goto fail;
450 }
451 if ((ac->clabel->row == r) &&
452 (ac->clabel->column == c) &&
453 (ac->clabel->mod_counter == mod_counter)) {
454 /* It's this one... */
455 /*
456 * Flag it as 'used', so we don't
457 * free it later.
458 */
459 ac->flag = 1;
460 #if DEBUG
461 printf("Found: %s at %d,%d.\n",
462 ac->devname, r, c);
463 #endif /* DEBUG */
464
465 break;
466 }
467 ac = ac->next;
468 }
469
470 if (ac == NULL) {
471 /*
472 * We didn't find an exact match with a
473 * correct mod_counter above... Can we
474 * find one with an incorrect mod_counter
475 * to use instead ? (This one, if we find
476 * it, will be marked as failed once the
477 * set configures)
478 */
479
480 ac = auto_config;
481 while(ac!=NULL) {
482 if (ac->clabel == NULL) {
483 /* Big-time bad news. */
484 goto fail;
485 }
486 if ((ac->clabel->row == r) &&
487 (ac->clabel->column == c)) {
488 /*
489 * It's this one...
490 * Flag it as 'used', so we
491 * don't free it later.
492 */
493 ac->flag = 1;
494 #if DEBUG
495 printf("Found(low mod_counter)"
496 ": %s at %d,%d.\n",
497 ac->devname, r, c);
498 #endif /* DEBUG */
499
500 break;
501 }
502 ac = ac->next;
503 }
504 }
505
506
507
508 if (ac!=NULL) {
509 /* Found it. Configure it... */
510 diskPtr->blockSize = ac->clabel->blockSize;
511 diskPtr->numBlocks = ac->clabel->numBlocks;
512 /*
513 * Note: rf_protectedSectors is already
514 * factored into numBlocks here.
515 */
516 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
517 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
518
519 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
520 ac->clabel, sizeof(*ac->clabel));
521 snprintf(diskPtr->devname,
522 sizeof diskPtr->devname, "/dev/%s",
523 ac->devname);
524
525 /*
526 * Note the fact that this component was
527 * autoconfigured. You'll need this info
528 * later. Trust me :)
529 */
530 diskPtr->auto_configured = 1;
531 diskPtr->dev = ac->dev;
532
533 /*
534 * We allow the user to specify that
535 * only a fraction of the disks should
536 * be used. This is just for debug: it
537 * speeds up the parity scan.
538 */
539
540 diskPtr->numBlocks = diskPtr->numBlocks *
541 rf_sizePercentage / 100;
542
543 /*
544 * XXX These will get set multiple times,
545 * but since we're autoconfiguring, they'd
546 * better be always the same each time !
547 * If not, this is the least of your worries.
548 */
549
550 bs = diskPtr->blockSize;
551 min_numblks = diskPtr->numBlocks;
552
553 /*
554 * This gets done multiple times, but that's
555 * fine -- the serial number will be the same
556 * for all components, guaranteed.
557 */
558 raidPtr->serial_number =
559 ac->clabel->serial_number;
560 /*
561 * Check the last time the label
562 * was modified.
563 */
564 if (ac->clabel->mod_counter != mod_counter) {
565 /*
566 * Even though we've filled in all
567 * of the above, we don't trust
568 * this component since it's
569 * modification counter is not
570 * in sync with the rest, and we really
571 * consider it to be failed.
572 */
573 disks[r][c].status = rf_ds_failed;
574 numFailuresThisRow++;
575 } else {
576 if (ac->clabel->clean != RF_RAID_CLEAN)
577 {
578 parity_good = RF_RAID_DIRTY;
579 }
580 }
581 } else {
582 /*
583 * Didn't find it at all !!!
584 * Component must really be dead.
585 */
586 disks[r][c].status = rf_ds_failed;
587 snprintf(disks[r][c].devname,
588 sizeof disks[r][c].devname, "component%d",
589 r * raidPtr->numCol + c);
590 numFailuresThisRow++;
591 }
592 }
593 /* XXX Fix for n-fault tolerant. */
594 /*
595 * XXX This should probably check to see how many failures
596 * we can handle for this configuration !
597 */
598 if (numFailuresThisRow > 0)
599 raidPtr->status[r] = rf_rs_degraded;
600 }
601
602 /* Close the device for the ones that didn't get used. */
603
604 ac = auto_config;
605 while(ac != NULL) {
606 if (ac->flag == 0) {
607 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
608 vput(ac->vp);
609 ac->vp = NULL;
610 #if DEBUG
611 printf("Released %s from auto-config set.\n",
612 ac->devname);
613 #endif /* DEBUG */
614 }
615 ac = ac->next;
616 }
617
618 raidPtr->mod_counter = mod_counter;
619
620 /* Note the state of the parity, if any. */
621 raidPtr->parity_good = parity_good;
622 raidPtr->sectorsPerDisk = min_numblks;
623 raidPtr->logBytesPerSector = ffs(bs) - 1;
624 raidPtr->bytesPerSector = bs;
625 raidPtr->sectorMask = bs - 1;
626 return (0);
627
628 fail:
629
630 rf_UnconfigureVnodes(raidPtr);
631
632 return (ret);
633
634 }
635
636 /* Configure a single disk in the array. */
637 int
638 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
639 RF_RowCol_t row, RF_RowCol_t col)
640 {
641 char *p;
642 int retcode;
643
644 struct partinfo dpart;
645 struct vnode *vp;
646 struct vattr va;
647 struct proc *proc;
648 int error;
649
650 retcode = 0;
651 p = rf_find_non_white(buf);
652 if (*buf != '\0' && p[strlen(p) - 1] == '\n') {
653 /* Strip off the newline. */
654 p[strlen(p) - 1] = '\0';
655 }
656 (void) strlcpy(diskPtr->devname, p, sizeof diskPtr->devname);
657
658 proc = raidPtr->engine_thread;
659
660 /* Let's start by claiming the component is fine and well... */
661 diskPtr->status = rf_ds_optimal;
662
663 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
664 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
665
666 error = raidlookup(diskPtr->devname, curproc, &vp);
667 if (error) {
668 printf("raidlookup on device: %s failed !\n", diskPtr->devname);
669 if (error == ENXIO) {
670 /* The component isn't there... Must be dead :-( */
671 diskPtr->status = rf_ds_failed;
672 } else {
673 return (error);
674 }
675 }
676 if (diskPtr->status == rf_ds_optimal) {
677
678 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
679 return (error);
680 }
681 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD,
682 proc->p_ucred, proc);
683 if (error) {
684 return (error);
685 }
686 diskPtr->blockSize = dpart.disklab->d_secsize;
687
688 diskPtr->numBlocks = DL_GETPSIZE(dpart.part) - rf_protectedSectors;
689 diskPtr->partitionSize = DL_GETPSIZE(dpart.part);
690
691 raidPtr->raid_cinfo[row][col].ci_vp = vp;
692 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
693
694 /* This component was not automatically configured. */
695 diskPtr->auto_configured = 0;
696 diskPtr->dev = va.va_rdev;
697
698 /*
699 * We allow the user to specify that only a fraction of the
700 * disks should be used. This is just for debug: it speeds up
701 * the parity scan.
702 */
703 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage
704 / 100;
705 }
706 return (0);
707 }
708
709 void
710 rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
711 RF_ComponentLabel_t *ci_label)
712 {
713
714 printf("raid%d: Component %s being configured at row: %d col: %d\n",
715 raidPtr->raidid, dev_name, row, column);
716 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
717 ci_label->row, ci_label->column, ci_label->num_rows,
718 ci_label->num_columns);
719 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
720 ci_label->version, ci_label->serial_number, ci_label->mod_counter);
721 printf(" Clean: %s Status: %d\n",
722 ci_label->clean ? "Yes" : "No", ci_label->status);
723 }
724
725 int
726 rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
727 RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter)
728 {
729 int fatal_error = 0;
730
731 if (serial_number != ci_label->serial_number) {
732 printf("%s has a different serial number: %d %d.\n",
733 dev_name, serial_number, ci_label->serial_number);
734 fatal_error = 1;
735 }
736 if (mod_counter != ci_label->mod_counter) {
737 printf("%s has a different modfication count: %d %d.\n",
738 dev_name, mod_counter, ci_label->mod_counter);
739 }
740
741 if (row != ci_label->row) {
742 printf("Row out of alignment for: %s.\n", dev_name);
743 fatal_error = 1;
744 }
745 if (column != ci_label->column) {
746 printf("Column out of alignment for: %s.\n", dev_name);
747 fatal_error = 1;
748 }
749 if (raidPtr->numRow != ci_label->num_rows) {
750 printf("Number of rows do not match for: %s.\n", dev_name);
751 fatal_error = 1;
752 }
753 if (raidPtr->numCol != ci_label->num_columns) {
754 printf("Number of columns do not match for: %s.\n", dev_name);
755 fatal_error = 1;
756 }
757 if (ci_label->clean == 0) {
758 /* It's not clean, but that's not fatal. */
759 printf("%s is not clean !\n", dev_name);
760 }
761 return(fatal_error);
762 }
763
764
765 /*
766 *
767 * rf_CheckLabels() - Check all the component labels for consistency.
768 * Return an error if there is anything major amiss.
769 *
770 */
771
772 int
773 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
774 {
775 int r, c;
776 char *dev_name;
777 RF_ComponentLabel_t *ci_label;
778 int serial_number = 0;
779 int mod_number = 0;
780 int fatal_error = 0;
781 int mod_values[4];
782 int mod_count[4];
783 int ser_values[4];
784 int ser_count[4];
785 int num_ser;
786 int num_mod;
787 int i;
788 int found;
789 int hosed_row;
790 int hosed_column;
791 int too_fatal;
792 int parity_good;
793 int force;
794
795 hosed_row = -1;
796 hosed_column = -1;
797 too_fatal = 0;
798 force = cfgPtr->force;
799
800 /*
801 * We're going to try to be a little intelligent here. If one
802 * component's label is bogus, and we can identify that it's the
803 * *only* one that's gone, we'll mark it as "failed" and allow
804 * the configuration to proceed. This will be the *only* case
805 * that we'll proceed if there would be (otherwise) fatal errors.
806 *
807 * Basically we simply keep a count of how many components had
808 * what serial number. If all but one agree, we simply mark
809 * the disagreeing component as being failed, and allow
810 * things to come up "normally".
811 *
812 * We do this first for serial numbers, and then for "mod_counter".
813 *
814 */
815
816 num_ser = 0;
817 num_mod = 0;
818 for (r = 0; r < raidPtr->numRow && !fatal_error; r++) {
819 for (c = 0; c < raidPtr->numCol; c++) {
820 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
821 found = 0;
822 for(i = 0; i < num_ser; i++) {
823 if (ser_values[i] == ci_label->serial_number) {
824 ser_count[i]++;
825 found = 1;
826 break;
827 }
828 }
829 if (!found) {
830 ser_values[num_ser] = ci_label->serial_number;
831 ser_count[num_ser] = 1;
832 num_ser++;
833 if (num_ser > 2) {
834 fatal_error = 1;
835 break;
836 }
837 }
838 found = 0;
839 for(i = 0; i < num_mod; i++) {
840 if (mod_values[i] == ci_label->mod_counter) {
841 mod_count[i]++;
842 found = 1;
843 break;
844 }
845 }
846 if (!found) {
847 mod_values[num_mod] = ci_label->mod_counter;
848 mod_count[num_mod] = 1;
849 num_mod++;
850 if (num_mod > 2) {
851 fatal_error = 1;
852 break;
853 }
854 }
855 }
856 }
857 #if DEBUG
858 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
859 for(i = 0; i < num_ser; i++) {
860 printf("%d %d\n", ser_values[i], ser_count[i]);
861 }
862 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
863 for(i = 0; i < num_mod; i++) {
864 printf("%d %d\n", mod_values[i], mod_count[i]);
865 }
866 #endif /* DEBUG */
867 serial_number = ser_values[0];
868 if (num_ser == 2) {
869 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
870 /* Locate the maverick component. */
871 if (ser_count[1] > ser_count[0]) {
872 serial_number = ser_values[1];
873 }
874 for (r = 0; r < raidPtr->numRow; r++) {
875 for (c = 0; c < raidPtr->numCol; c++) {
876 ci_label =
877 &raidPtr->raid_cinfo[r][c].ci_label;
878 if (serial_number !=
879 ci_label->serial_number) {
880 hosed_row = r;
881 hosed_column = c;
882 break;
883 }
884 }
885 }
886 printf("Hosed component: %s.\n",
887 &cfgPtr->devnames[hosed_row][hosed_column][0]);
888 if (!force) {
889 /*
890 * We'll fail this component, as if there are
891 * other major errors, we aren't forcing things
892 * and we'll abort the config anyways.
893 */
894 raidPtr->Disks[hosed_row][hosed_column].status
895 = rf_ds_failed;
896 raidPtr->numFailures++;
897 raidPtr->status[hosed_row] = rf_rs_degraded;
898 }
899 } else {
900 too_fatal = 1;
901 }
902 if (cfgPtr->parityConfig == '0') {
903 /*
904 * We've identified two different serial numbers.
905 * RAID 0 can't cope with that, so we'll punt.
906 */
907 too_fatal = 1;
908 }
909
910 }
911
912 /*
913 * Record the serial number for later. If we bail later, setting
914 * this doesn't matter, otherwise we've got the best guess at the
915 * correct serial number.
916 */
917 raidPtr->serial_number = serial_number;
918
919 mod_number = mod_values[0];
920 if (num_mod == 2) {
921 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
922 /* Locate the maverick component. */
923 if (mod_count[1] > mod_count[0]) {
924 mod_number = mod_values[1];
925 } else if (mod_count[1] < mod_count[0]) {
926 mod_number = mod_values[0];
927 } else {
928 /*
929 * Counts of different modification values
930 * are the same. Assume greater value is
931 * the correct one, all other things
932 * considered.
933 */
934 if (mod_values[0] > mod_values[1]) {
935 mod_number = mod_values[0];
936 } else {
937 mod_number = mod_values[1];
938 }
939
940 }
941 for (r = 0; r < raidPtr->numRow && !too_fatal; r++) {
942 for (c = 0; c < raidPtr->numCol; c++) {
943 ci_label =
944 &raidPtr->raid_cinfo[r][c].ci_label;
945 if (mod_number !=
946 ci_label->mod_counter) {
947 if ((hosed_row == r) &&
948 (hosed_column == c)) {
949 /*
950 * Same one. Can
951 * deal with it.
952 */
953 } else {
954 hosed_row = r;
955 hosed_column = c;
956 if (num_ser != 1) {
957 too_fatal = 1;
958 break;
959 }
960 }
961 }
962 }
963 }
964 printf("Hosed component: %s.\n",
965 &cfgPtr->devnames[hosed_row][hosed_column][0]);
966 if (!force) {
967 /*
968 * We'll fail this component, as if there are
969 * other major errors, we aren't forcing things
970 * and we'll abort the config anyways.
971 */
972 if (raidPtr
973 ->Disks[hosed_row][hosed_column].status !=
974 rf_ds_failed) {
975 raidPtr->Disks[hosed_row]
976 [hosed_column].status =
977 rf_ds_failed;
978 raidPtr->numFailures++;
979 raidPtr->status[hosed_row] =
980 rf_rs_degraded;
981 }
982 }
983 } else {
984 too_fatal = 1;
985 }
986 if (cfgPtr->parityConfig == '0') {
987 /*
988 * We've identified two different mod counters.
989 * RAID 0 can't cope with that, so we'll punt.
990 */
991 too_fatal = 1;
992 }
993 }
994
995 raidPtr->mod_counter = mod_number;
996
997 if (too_fatal) {
998 /*
999 * We've had both a serial number mismatch, and a mod_counter
1000 * mismatch -- and they involved two different components !!!
1001 * Bail -- make things fail so that the user must force
1002 * the issue...
1003 */
1004 hosed_row = -1;
1005 hosed_column = -1;
1006 }
1007
1008 if (num_ser > 2) {
1009 printf("raid%d: Too many different serial numbers !\n",
1010 raidPtr->raidid);
1011 }
1012
1013 if (num_mod > 2) {
1014 printf("raid%d: Too many different mod counters !\n",
1015 raidPtr->raidid);
1016 }
1017
1018 /*
1019 * We start by assuming the parity will be good, and flee from
1020 * that notion at the slightest sign of trouble.
1021 */
1022
1023 parity_good = RF_RAID_CLEAN;
1024 for (r = 0; r < raidPtr->numRow; r++) {
1025 for (c = 0; c < raidPtr->numCol; c++) {
1026 dev_name = &cfgPtr->devnames[r][c][0];
1027 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
1028
1029 if ((r == hosed_row) && (c == hosed_column)) {
1030 printf("raid%d: Ignoring %s.\n",
1031 raidPtr->raidid, dev_name);
1032 } else {
1033 rf_print_label_status(raidPtr, r, c, dev_name,
1034 ci_label);
1035 if (rf_check_label_vitals(raidPtr, r, c,
1036 dev_name, ci_label, serial_number,
1037 mod_number)) {
1038 fatal_error = 1;
1039 }
1040 if (ci_label->clean != RF_RAID_CLEAN) {
1041 parity_good = RF_RAID_DIRTY;
1042 }
1043 }
1044 }
1045 }
1046 if (fatal_error) {
1047 parity_good = RF_RAID_DIRTY;
1048 }
1049
1050 /* We note the state of the parity. */
1051 raidPtr->parity_good = parity_good;
1052
1053 return(fatal_error);
1054 }
1055
1056 int
1057 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1058 {
1059 RF_RaidDisk_t *disks;
1060 RF_DiskQueue_t *spareQueues;
1061 int ret;
1062 unsigned int bs;
1063 int spare_number;
1064
1065 #if 0
1066 printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare);
1067 printf("Num col: %d.\n", raidPtr->numCol);
1068 #endif
1069 if (raidPtr->numSpare >= RF_MAXSPARE) {
1070 RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare);
1071 return(EINVAL);
1072 }
1073
1074 RF_LOCK_MUTEX(raidPtr->mutex);
1075
1076 /* The beginning of the spares... */
1077 disks = &raidPtr->Disks[0][raidPtr->numCol];
1078
1079 spare_number = raidPtr->numSpare;
1080
1081 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1082 &disks[spare_number], 0, raidPtr->numCol + spare_number);
1083
1084 if (ret)
1085 goto fail;
1086 if (disks[spare_number].status != rf_ds_optimal) {
1087 RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n",
1088 sparePtr->component_name);
1089 ret = EINVAL;
1090 goto fail;
1091 } else {
1092 disks[spare_number].status = rf_ds_spare;
1093 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d"
1094 " (%ld MB).\n", spare_number, disks[spare_number].devname,
1095 (long int) disks[spare_number].numBlocks,
1096 disks[spare_number].blockSize,
1097 (long int) disks[spare_number].numBlocks *
1098 disks[spare_number].blockSize / 1024 / 1024);
1099 }
1100
1101
1102 /* Check sizes and block sizes on the spare disk. */
1103 bs = 1 << raidPtr->logBytesPerSector;
1104 if (disks[spare_number].blockSize != bs) {
1105 RF_ERRORMSG3("Block size of %d on spare disk %s is not"
1106 " the same as on other disks (%d).\n",
1107 disks[spare_number].blockSize,
1108 disks[spare_number].devname, bs);
1109 ret = EINVAL;
1110 goto fail;
1111 }
1112 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1113 RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small to serve"
1114 " as a spare (need %llu blocks).\n",
1115 disks[spare_number].devname, disks[spare_number].numBlocks,
1116 raidPtr->sectorsPerDisk);
1117 ret = EINVAL;
1118 goto fail;
1119 } else {
1120 if (disks[spare_number].numBlocks >
1121 raidPtr->sectorsPerDisk) {
1122 RF_ERRORMSG2("Warning: truncating spare disk %s to %llu"
1123 " blocks.\n", disks[spare_number].devname,
1124 raidPtr->sectorsPerDisk);
1125
1126 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1127 }
1128 }
1129
1130 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1131 ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number],
1132 0, raidPtr->numCol + spare_number, raidPtr->qType,
1133 raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol +
1134 spare_number].dev, raidPtr->maxOutstanding,
1135 &raidPtr->shutdownList, raidPtr->cleanupList);
1136
1137
1138 raidPtr->numSpare++;
1139 RF_UNLOCK_MUTEX(raidPtr->mutex);
1140 return (0);
1141
1142 fail:
1143 RF_UNLOCK_MUTEX(raidPtr->mutex);
1144 return(ret);
1145 }
1146
1147 int
1148 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1149 {
1150 int spare_number;
1151
1152 if (raidPtr->numSpare == 0) {
1153 printf("No spares to remove !\n");
1154 return(EINVAL);
1155 }
1156
1157 spare_number = sparePtr->column;
1158
1159 return(EINVAL); /* XXX Not implemented yet. */
1160 #if 0
1161 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1162 return(EINVAL);
1163 }
1164
1165 /* Verify that this spare isn't in use... */
1166
1167 /* It's gone... */
1168
1169 raidPtr->numSpare--;
1170
1171 return (0);
1172 #endif
1173 }
1174
1175 int
1176 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1177 {
1178 RF_RaidDisk_t *disks;
1179
1180 if ((component->row < 0) ||
1181 (component->row >= raidPtr->numRow) ||
1182 (component->column < 0) ||
1183 (component->column >= raidPtr->numCol)) {
1184 return(EINVAL);
1185 }
1186
1187 disks = &raidPtr->Disks[component->row][component->column];
1188
1189 /* 1. This component must be marked as 'failed'. */
1190
1191 return(EINVAL); /* Not implemented yet. */
1192 }
1193
1194 int
1195 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1196 {
1197
1198 /*
1199 * Issues here include how to 'move' this in if there is IO
1200 * taking place (e.g. component queues and such).
1201 */
1202
1203 return(EINVAL); /* Not implemented yet. */
1204 }