This source file includes following definitions.
- rf_CreateRaidFiveDegradedReadDAG
- rf_CreateRaidOneDegradedReadDAG
- rf_CreateDegradedReadDAG
- rf_CreateRaidCDegradedReadDAG
- rf_DD_GenerateFailedAccessASMs
- rf_DoubleDegRead
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 #include "rf_types.h"
38 #include "rf_raid.h"
39 #include "rf_dag.h"
40 #include "rf_dagutils.h"
41 #include "rf_dagfuncs.h"
42 #include "rf_debugMem.h"
43 #include "rf_memchunk.h"
44 #include "rf_general.h"
45 #include "rf_dagdegrd.h"
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 void
77 rf_CreateRaidFiveDegradedReadDAG(
78 RF_Raid_t *raidPtr,
79 RF_AccessStripeMap_t *asmap,
80 RF_DagHeader_t *dag_h,
81 void *bp,
82 RF_RaidAccessFlags_t flags,
83 RF_AllocListElem_t *allocList)
84 {
85 rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
86 &rf_xorRecoveryFuncs);
87 }
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 void
114 rf_CreateRaidOneDegradedReadDAG(
115 RF_Raid_t *raidPtr,
116 RF_AccessStripeMap_t *asmap,
117 RF_DagHeader_t *dag_h,
118 void *bp,
119 RF_RaidAccessFlags_t flags,
120 RF_AllocListElem_t *allocList)
121 {
122 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
123 RF_StripeNum_t parityStripeID;
124 RF_ReconUnitNum_t which_ru;
125 RF_PhysDiskAddr_t *pda;
126 int useMirror, i;
127
128 useMirror = 0;
129 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
130 asmap->raidAddress, &which_ru);
131 if (rf_dagDebug) {
132 printf("[Creating RAID level 1 degraded read DAG]\n");
133 }
134 dag_h->creator = "RaidOneDegradedReadDAG";
135
136 if (asmap->numDataFailed == 0)
137 useMirror = RF_FALSE;
138 else
139 useMirror = RF_TRUE;
140
141
142 RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *),
143 allocList);
144 i = 0;
145 rdNode = &nodes[i];
146 i++;
147 blockNode = &nodes[i];
148 i++;
149 commitNode = &nodes[i];
150 i++;
151 termNode = &nodes[i];
152 i++;
153
154
155
156
157
158
159 dag_h->numCommitNodes = 1;
160 dag_h->numCommits = 0;
161 dag_h->numSuccedents = 1;
162
163
164 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
165 rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
166 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
167 rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
168 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
169 rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
170
171 pda = asmap->physInfo;
172 RF_ASSERT(pda != NULL);
173
174 RF_ASSERT(asmap->parityInfo->next == NULL);
175
176
177 if (!useMirror) {
178
179 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
180 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
181 dag_h, "Rpd", allocList);
182 rdNode->params[0].p = pda;
183 rdNode->params[1].p = pda->bufPtr;
184 rdNode->params[2].v = parityStripeID;
185 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
186 0, 0, which_ru);
187 } else {
188
189 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
190 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
191 dag_h, "Rsd", allocList);
192 rdNode->params[0].p = asmap->parityInfo;
193 rdNode->params[1].p = pda->bufPtr;
194 rdNode->params[2].v = parityStripeID;
195 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
196 0, 0, which_ru);
197 }
198
199
200 RF_ASSERT(dag_h->numSuccedents == 1);
201 RF_ASSERT(blockNode->numAntecedents == 0);
202 dag_h->succedents[0] = blockNode;
203
204
205 RF_ASSERT(blockNode->numSuccedents == 1);
206 RF_ASSERT(rdNode->numAntecedents == 1);
207 blockNode->succedents[0] = rdNode;
208 rdNode->antecedents[0] = blockNode;
209 rdNode->antType[0] = rf_control;
210
211
212 RF_ASSERT(rdNode->numSuccedents == 1);
213 RF_ASSERT(commitNode->numAntecedents == 1);
214 rdNode->succedents[0] = commitNode;
215 commitNode->antecedents[0] = rdNode;
216 commitNode->antType[0] = rf_control;
217
218
219 RF_ASSERT(commitNode->numSuccedents == 1);
220 RF_ASSERT(termNode->numAntecedents == 1);
221 RF_ASSERT(termNode->numSuccedents == 0);
222 commitNode->succedents[0] = termNode;
223 termNode->antecedents[0] = commitNode;
224 termNode->antType[0] = rf_control;
225 }
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257 void
258 rf_CreateDegradedReadDAG(
259 RF_Raid_t *raidPtr,
260 RF_AccessStripeMap_t *asmap,
261 RF_DagHeader_t *dag_h,
262 void *bp,
263 RF_RaidAccessFlags_t flags,
264 RF_AllocListElem_t *allocList,
265 RF_RedFuncs_t *recFunc)
266 {
267 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode;
268 RF_DagNode_t *commitNode, *rpNode, *termNode;
269 int nNodes, nRrdNodes, nRudNodes, nXorBufs, i;
270 int j, paramNum;
271 RF_SectorCount_t sectorsPerSU;
272 RF_ReconUnitNum_t which_ru;
273 char *overlappingPDAs;
274 RF_AccessStripeMapHeader_t *new_asm_h[2];
275 RF_PhysDiskAddr_t *pda, *parityPDA;
276 RF_StripeNum_t parityStripeID;
277 RF_PhysDiskAddr_t *failedPDA;
278 RF_RaidLayout_t *layoutPtr;
279 char *rpBuf;
280
281 layoutPtr = &(raidPtr->Layout);
282
283
284
285
286 failedPDA = asmap->failedPDAs[0];
287 parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
288 asmap->raidAddress, &which_ru);
289 sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
290
291 if (rf_dagDebug) {
292 printf("[Creating degraded read DAG]\n");
293 }
294 RF_ASSERT(asmap->numDataFailed == 1);
295 dag_h->creator = "DegradedReadDAG";
296
297
298
299
300
301
302
303 RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed,
304 sizeof(char), (char *));
305 rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h,
306 new_asm_h, &nXorBufs, &rpBuf, overlappingPDAs, allocList);
307
308
309
310
311
312
313 nRudNodes = asmap->numStripeUnitsAccessed - 1;
314 nRrdNodes = ((new_asm_h[0]) ?
315 new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
316 ((new_asm_h[1]) ?
317 new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
318 nNodes = 5 + nRudNodes + nRrdNodes;
319
320
321
322 RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
323 allocList);
324 i = 0;
325 blockNode = &nodes[i];
326 i++;
327 commitNode = &nodes[i];
328 i++;
329 xorNode = &nodes[i];
330 i++;
331 rpNode = &nodes[i];
332 i++;
333 termNode = &nodes[i];
334 i++;
335 rudNodes = &nodes[i];
336 i += nRudNodes;
337 rrdNodes = &nodes[i];
338 i += nRrdNodes;
339 RF_ASSERT(i == nNodes);
340
341
342 dag_h->numCommitNodes = 1;
343 dag_h->numCommits = 0;
344
345
346
347
348 dag_h->numSuccedents = 1;
349
350 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
351 rf_NullNodeUndoFunc, NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0,
352 dag_h, "Nil", allocList);
353 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
354 rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
355 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
356 rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
357 rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple,
358 rf_NullNodeUndoFunc, NULL, 1, nRudNodes + nRrdNodes + 1,
359 2 * nXorBufs + 2, 1, dag_h, recFunc->SimpleName, allocList);
360
361
362 for (pda = asmap->physInfo, i = 0; i < nRudNodes;
363 i++, pda = pda->next) {
364 if (pda == failedPDA) {
365 i--;
366 continue;
367 }
368 rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc,
369 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
370 dag_h, "Rud", allocList);
371 RF_ASSERT(pda);
372 rudNodes[i].params[0].p = pda;
373 rudNodes[i].params[1].p = pda->bufPtr;
374 rudNodes[i].params[2].v = parityStripeID;
375 rudNodes[i].params[3].v =
376 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
377 }
378
379
380 i = 0;
381 if (new_asm_h[0]) {
382 for (pda = new_asm_h[0]->stripeMap->physInfo;
383 i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
384 i++, pda = pda->next) {
385 rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE,
386 rf_DiskReadFunc, rf_DiskReadUndoFunc,
387 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
388 "Rrd", allocList);
389 RF_ASSERT(pda);
390 rrdNodes[i].params[0].p = pda;
391 rrdNodes[i].params[1].p = pda->bufPtr;
392 rrdNodes[i].params[2].v = parityStripeID;
393 rrdNodes[i].params[3].v =
394 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
395 which_ru);
396 }
397 }
398 if (new_asm_h[1]) {
399 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
400 j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
401 j++, pda = pda->next) {
402 rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE,
403 rf_DiskReadFunc, rf_DiskReadUndoFunc,
404 rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
405 "Rrd", allocList);
406 RF_ASSERT(pda);
407 rrdNodes[i + j].params[0].p = pda;
408 rrdNodes[i + j].params[1].p = pda->bufPtr;
409 rrdNodes[i + j].params[2].v = parityStripeID;
410 rrdNodes[i + j].params[3].v =
411 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
412 which_ru);
413 }
414 }
415
416 RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
417 (RF_PhysDiskAddr_t *), allocList);
418 parityPDA->row = asmap->parityInfo->row;
419 parityPDA->col = asmap->parityInfo->col;
420 parityPDA->startSector = ((asmap->parityInfo->startSector /
421 sectorsPerSU) * sectorsPerSU) +
422 (failedPDA->startSector % sectorsPerSU);
423 parityPDA->numSector = failedPDA->numSector;
424
425
426 rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
427 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
428 "Rp ", allocList);
429 rpNode->params[0].p = parityPDA;
430 rpNode->params[1].p = rpBuf;
431 rpNode->params[2].v = parityStripeID;
432 rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
433 which_ru);
434
435
436
437
438
439 paramNum = 0;
440 for (i = 0; i < nRrdNodes; i++) {
441
442 xorNode->params[paramNum++] = rrdNodes[i].params[0];
443 xorNode->params[paramNum++] = rrdNodes[i].params[1];
444 }
445 for (i = 0; i < nRudNodes; i++) {
446
447
448 if (overlappingPDAs[i]) {
449 RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t),
450 (RF_PhysDiskAddr_t *), allocList);
451 bcopy((char *) rudNodes[i].params[0].p, (char *) pda,
452 sizeof(RF_PhysDiskAddr_t));
453 rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
454 RF_RESTRICT_DOBUFFER, 0);
455 xorNode->params[paramNum++].p = pda;
456 xorNode->params[paramNum++].p = pda->bufPtr;
457 }
458 }
459 RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
460
461
462 xorNode->params[paramNum++].p = parityPDA;
463 xorNode->params[paramNum++].p = rpBuf;
464
465
466
467
468
469 xorNode->params[paramNum++].p = failedPDA;
470 xorNode->params[paramNum++].p = raidPtr;
471 RF_ASSERT(paramNum == 2 * nXorBufs + 2);
472
473
474
475
476
477
478 xorNode->results[0] = failedPDA->bufPtr;
479 RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr,
480 failedPDA->numSector));
481
482
483
484 RF_ASSERT(dag_h->numSuccedents == 1);
485 RF_ASSERT(blockNode->numAntecedents == 0);
486 dag_h->succedents[0] = blockNode;
487
488
489 RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes));
490 RF_ASSERT(rpNode->numAntecedents == 1);
491 blockNode->succedents[0] = rpNode;
492 rpNode->antecedents[0] = blockNode;
493 rpNode->antType[0] = rf_control;
494 for (i = 0; i < nRrdNodes; i++) {
495 RF_ASSERT(rrdNodes[i].numSuccedents == 1);
496 blockNode->succedents[1 + i] = &rrdNodes[i];
497 rrdNodes[i].antecedents[0] = blockNode;
498 rrdNodes[i].antType[0] = rf_control;
499 }
500 for (i = 0; i < nRudNodes; i++) {
501 RF_ASSERT(rudNodes[i].numSuccedents == 1);
502 blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i];
503 rudNodes[i].antecedents[0] = blockNode;
504 rudNodes[i].antType[0] = rf_control;
505 }
506
507
508 RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes));
509 RF_ASSERT(rpNode->numSuccedents == 1);
510 rpNode->succedents[0] = xorNode;
511 xorNode->antecedents[0] = rpNode;
512 xorNode->antType[0] = rf_trueData;
513 for (i = 0; i < nRrdNodes; i++) {
514 RF_ASSERT(rrdNodes[i].numSuccedents == 1);
515 rrdNodes[i].succedents[0] = xorNode;
516 xorNode->antecedents[1 + i] = &rrdNodes[i];
517 xorNode->antType[1 + i] = rf_trueData;
518 }
519 for (i = 0; i < nRudNodes; i++) {
520 RF_ASSERT(rudNodes[i].numSuccedents == 1);
521 rudNodes[i].succedents[0] = xorNode;
522 xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i];
523 xorNode->antType[1 + nRrdNodes + i] = rf_trueData;
524 }
525
526
527 RF_ASSERT(xorNode->numSuccedents == 1);
528 RF_ASSERT(commitNode->numAntecedents == 1);
529 xorNode->succedents[0] = commitNode;
530 commitNode->antecedents[0] = xorNode;
531 commitNode->antType[0] = rf_control;
532
533
534 RF_ASSERT(commitNode->numSuccedents == 1);
535 RF_ASSERT(termNode->numAntecedents == 1);
536 RF_ASSERT(termNode->numSuccedents == 0);
537 commitNode->succedents[0] = termNode;
538 termNode->antType[0] = rf_control;
539 termNode->antecedents[0] = commitNode;
540 }
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559 void
560 rf_CreateRaidCDegradedReadDAG(
561 RF_Raid_t *raidPtr,
562 RF_AccessStripeMap_t *asmap,
563 RF_DagHeader_t *dag_h,
564 void *bp,
565 RF_RaidAccessFlags_t flags,
566 RF_AllocListElem_t *allocList
567 )
568 {
569 RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
570 RF_StripeNum_t parityStripeID;
571 int useMirror, i, shiftable;
572 RF_ReconUnitNum_t which_ru;
573 RF_PhysDiskAddr_t *pda;
574
575 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
576 shiftable = RF_TRUE;
577 } else {
578 shiftable = RF_FALSE;
579 }
580 useMirror = 0;
581 parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
582 asmap->raidAddress, &which_ru);
583
584 if (rf_dagDebug) {
585 printf("[Creating RAID C degraded read DAG]\n");
586 }
587 dag_h->creator = "RaidCDegradedReadDAG";
588
589 if (asmap->numDataFailed == 0)
590 useMirror = RF_FALSE;
591 else
592 useMirror = RF_TRUE;
593
594
595 RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *),
596 allocList);
597 i = 0;
598 rdNode = &nodes[i];
599 i++;
600 blockNode = &nodes[i];
601 i++;
602 commitNode = &nodes[i];
603 i++;
604 termNode = &nodes[i];
605 i++;
606
607
608
609
610
611
612 dag_h->numCommitNodes = 1;
613 dag_h->numCommits = 0;
614 dag_h->numSuccedents = 1;
615
616
617 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
618 rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
619 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
620 rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
621 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
622 rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
623
624 pda = asmap->physInfo;
625 RF_ASSERT(pda != NULL);
626
627 RF_ASSERT(asmap->parityInfo->next == NULL);
628
629
630 if (!useMirror) {
631 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
632 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
633 dag_h, "Rpd", allocList);
634 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) {
635
636 rdNode->params[0].p = asmap->parityInfo;
637 rdNode->params[1].p = pda->bufPtr;
638 rdNode->params[2].v = parityStripeID;
639 rdNode->params[3].v = RF_CREATE_PARAM3(
640 RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
641 } else {
642
643 rdNode->params[0].p = pda;
644 rdNode->params[1].p = pda->bufPtr;
645 rdNode->params[2].v = parityStripeID;
646 rdNode->params[3].v = RF_CREATE_PARAM3(
647 RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
648 }
649 } else {
650
651 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
652 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
653 dag_h, "Rsd", allocList);
654 rdNode->params[0].p = asmap->parityInfo;
655 rdNode->params[1].p = pda->bufPtr;
656 rdNode->params[2].v = parityStripeID;
657 rdNode->params[3].v =
658 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
659 }
660
661
662 RF_ASSERT(dag_h->numSuccedents == 1);
663 RF_ASSERT(blockNode->numAntecedents == 0);
664 dag_h->succedents[0] = blockNode;
665
666
667 RF_ASSERT(blockNode->numSuccedents == 1);
668 RF_ASSERT(rdNode->numAntecedents == 1);
669 blockNode->succedents[0] = rdNode;
670 rdNode->antecedents[0] = blockNode;
671 rdNode->antType[0] = rf_control;
672
673
674 RF_ASSERT(rdNode->numSuccedents == 1);
675 RF_ASSERT(commitNode->numAntecedents == 1);
676 rdNode->succedents[0] = commitNode;
677 commitNode->antecedents[0] = rdNode;
678 commitNode->antType[0] = rf_control;
679
680
681 RF_ASSERT(commitNode->numSuccedents == 1);
682 RF_ASSERT(termNode->numAntecedents == 1);
683 RF_ASSERT(termNode->numSuccedents == 0);
684 commitNode->succedents[0] = termNode;
685 termNode->antecedents[0] = commitNode;
686 termNode->antType[0] = rf_control;
687 }
688
689
690
691
692 void
693 rf_DD_GenerateFailedAccessASMs(
694 RF_Raid_t *raidPtr,
695 RF_AccessStripeMap_t *asmap,
696 RF_PhysDiskAddr_t **pdap,
697 int *nNodep,
698 RF_PhysDiskAddr_t **pqpdap,
699 int *nPQNodep,
700 RF_AllocListElem_t *allocList
701 )
702 {
703 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
704 int PDAPerDisk, i;
705 RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
706 int numDataCol = layoutPtr->numDataCol;
707 int state;
708 RF_SectorNum_t suoff, suend;
709 unsigned firstDataCol, napdas, count;
710 RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0;
711 RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0];
712 RF_PhysDiskAddr_t *ftwo = asmap->failedPDAs[1];
713 RF_PhysDiskAddr_t *pda_p;
714 RF_PhysDiskAddr_t *phys_p;
715 RF_RaidAddr_t sosAddr;
716
717
718
719
720
721
722
723 fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector);
724 fone_end = fone_start + fone->numSector;
725
726 #define CONS_PDA(if,start,num) do { \
727 pda_p->row = asmap->if->row; \
728 pda_p->col = asmap->if->col; \
729 pda_p->startSector = ((asmap->if->startSector / secPerSU) * \
730 secPerSU) + start; \
731 pda_p->numSector = num; \
732 pda_p->next = NULL; \
733 RF_MallocAndAdd(pda_p->bufPtr, \
734 rf_RaidAddressToByte(raidPtr,num),(char *), allocList); \
735 } while (0)
736
737 if (asmap->numDataFailed == 1) {
738 PDAPerDisk = 1;
739 state = 1;
740 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t),
741 (RF_PhysDiskAddr_t *), allocList);
742 pda_p = *pqpdap;
743
744 CONS_PDA(parityInfo, fone_start, fone->numSector);
745 pda_p->type = RF_PDA_TYPE_PARITY;
746 pda_p++;
747
748 CONS_PDA(qInfo, fone_start, fone->numSector);
749 pda_p->type = RF_PDA_TYPE_Q;
750 } else {
751 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector);
752 ftwo_end = ftwo_start + ftwo->numSector;
753 if (fone->numSector + ftwo->numSector > secPerSU) {
754 PDAPerDisk = 1;
755 state = 2;
756 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t),
757 (RF_PhysDiskAddr_t *), allocList);
758 pda_p = *pqpdap;
759 CONS_PDA(parityInfo, 0, secPerSU);
760 pda_p->type = RF_PDA_TYPE_PARITY;
761 pda_p++;
762 CONS_PDA(qInfo, 0, secPerSU);
763 pda_p->type = RF_PDA_TYPE_Q;
764 } else {
765 PDAPerDisk = 2;
766 state = 3;
767
768 RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t),
769 (RF_PhysDiskAddr_t *), allocList);
770 pda_p = *pqpdap;
771 CONS_PDA(parityInfo, fone_start, fone->numSector);
772 pda_p->type = RF_PDA_TYPE_PARITY;
773 pda_p++;
774 CONS_PDA(qInfo, fone_start, fone->numSector);
775 pda_p->type = RF_PDA_TYPE_Q;
776 pda_p++;
777 CONS_PDA(parityInfo, ftwo_start, ftwo->numSector);
778 pda_p->type = RF_PDA_TYPE_PARITY;
779 pda_p++;
780 CONS_PDA(qInfo, ftwo_start, ftwo->numSector);
781 pda_p->type = RF_PDA_TYPE_Q;
782 }
783 }
784
785 napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed -
786 (ftwo == NULL ? 1 : 0));
787 *nPQNodep = PDAPerDisk;
788
789
790
791
792
793
794 count = 0;
795 for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) {
796 if ((pda_p == fone) || (pda_p == ftwo))
797 continue;
798 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector);
799 suend = suoff + pda_p->numSector;
800 switch (state) {
801 case 1:
802
803
804
805
806 if ((suoff > fone_start) || (suend < fone_end))
807 count++;
808 break;
809 case 2:
810 if (suoff)
811 count++;
812 if (suend < numDataCol)
813 count++;
814 break;
815 case 3:
816 if ((suoff > fone_start) || (suend < fone_end))
817 count++;
818 if ((suoff > ftwo_start) || (suend < ftwo_end))
819 count++;
820 break;
821 default:
822 RF_PANIC();
823 }
824 }
825
826 napdas += count;
827 *nNodep = napdas;
828 if (napdas == 0)
829 return;
830
831
832
833 RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t),
834 (RF_PhysDiskAddr_t *), allocList);
835 *pdap = pda_p;
836
837
838 for (i = 0; i < (napdas - 1); i++)
839 pda_p[i].next = pda_p + (i + 1);
840
841
842 firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
843 asmap->physInfo->raidAddress) % numDataCol;
844 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
845 asmap->raidAddress);
846 for (i = 0; i < firstDataCol; i++) {
847 if ((pda_p - (*pdap)) == napdas)
848 continue;
849 pda_p->type = RF_PDA_TYPE_DATA;
850 pda_p->raidAddress = sosAddr + (i * secPerSU);
851 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress,
852 &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
853
854 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
855 continue;
856 switch (state) {
857 case 1:
858 pda_p->numSector = fone->numSector;
859 pda_p->raidAddress += fone_start;
860 pda_p->startSector += fone_start;
861 RF_MallocAndAdd(pda_p->bufPtr,
862 rf_RaidAddressToByte(raidPtr, pda_p->numSector),
863 (char *), allocList);
864 break;
865 case 2:
866 pda_p->numSector = secPerSU;
867 RF_MallocAndAdd(pda_p->bufPtr,
868 rf_RaidAddressToByte(raidPtr, secPerSU),
869 (char *), allocList);
870 break;
871 case 3:
872 pda_p->numSector = fone->numSector;
873 pda_p->raidAddress += fone_start;
874 pda_p->startSector += fone_start;
875 RF_MallocAndAdd(pda_p->bufPtr,
876 rf_RaidAddressToByte(raidPtr, pda_p->numSector),
877 (char *), allocList);
878 pda_p++;
879 pda_p->type = RF_PDA_TYPE_DATA;
880 pda_p->raidAddress = sosAddr + (i * secPerSU);
881 (raidPtr->Layout.map->MapSector) (raidPtr,
882 pda_p->raidAddress, &(pda_p->row), &(pda_p->col),
883 &(pda_p->startSector), 0);
884 pda_p->numSector = ftwo->numSector;
885 pda_p->raidAddress += ftwo_start;
886 pda_p->startSector += ftwo_start;
887 RF_MallocAndAdd(pda_p->bufPtr,
888 rf_RaidAddressToByte(raidPtr, pda_p->numSector),
889 (char *), allocList);
890 break;
891 default:
892 RF_PANIC();
893 }
894 pda_p++;
895 }
896
897
898 for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) {
899 if ((phys_p == asmap->failedPDAs[0]) ||
900 (phys_p == asmap->failedPDAs[1]))
901 continue;
902 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector);
903 suend = suoff + phys_p->numSector;
904 switch (state) {
905 case 1:
906 if (suoff > fone_start) {
907 RF_ASSERT(suend >= fone_end);
908
909
910
911
912 pda_p->numSector = suoff - fone_start;
913 pda_p->raidAddress = sosAddr + (i * secPerSU)
914 + fone_start;
915 (raidPtr->Layout.map->MapSector) (raidPtr,
916 pda_p->raidAddress, &(pda_p->row),
917 &(pda_p->col), &(pda_p->startSector), 0);
918 RF_MallocAndAdd(pda_p->bufPtr,
919 rf_RaidAddressToByte(raidPtr,
920 pda_p->numSector), (char *), allocList);
921 pda_p++;
922 }
923 if (suend < fone_end) {
924 RF_ASSERT(suoff <= fone_start);
925
926
927
928
929 pda_p->numSector = fone_end - suend;
930 pda_p->raidAddress = sosAddr + (i * secPerSU)
931 + suend;
932 (raidPtr->Layout.map->MapSector) (raidPtr,
933 pda_p->raidAddress, &(pda_p->row),
934 &(pda_p->col), &(pda_p->startSector), 0);
935 RF_MallocAndAdd(pda_p->bufPtr,
936 rf_RaidAddressToByte(raidPtr,
937 pda_p->numSector), (char *), allocList);
938 pda_p++;
939 }
940 break;
941 case 2:
942 RF_ASSERT((suoff == 0) || (suend == secPerSU));
943 if (suend < secPerSU) {
944
945 pda_p->numSector = secPerSU - suend;
946 pda_p->raidAddress = sosAddr + (i * secPerSU)
947 + suend;
948 (raidPtr->Layout.map->MapSector) (raidPtr,
949 pda_p->raidAddress, &(pda_p->row),
950 &(pda_p->col), &(pda_p->startSector), 0);
951 RF_MallocAndAdd(pda_p->bufPtr,
952 rf_RaidAddressToByte(raidPtr,
953 pda_p->numSector), (char *), allocList);
954 pda_p++;
955 } else
956 if (suoff > 0) {
957
958 pda_p->numSector = suoff;
959 pda_p->raidAddress = sosAddr +
960 (i * secPerSU);
961 (raidPtr->Layout.map->MapSector)
962 (raidPtr, pda_p->raidAddress,
963 &(pda_p->row), &(pda_p->col),
964 &(pda_p->startSector), 0);
965 RF_MallocAndAdd(pda_p->bufPtr,
966 rf_RaidAddressToByte(raidPtr,
967 pda_p->numSector), (char *),
968 allocList);
969 pda_p++;
970 }
971 break;
972 case 3:
973 if ((suoff > fone_start) || (suend < fone_end)) {
974 if (suoff > fone_start) {
975 RF_ASSERT(suend >= fone_end);
976
977
978
979
980
981 pda_p->numSector = suoff - fone_start;
982 pda_p->raidAddress = sosAddr +
983 (i * secPerSU) + fone_start;
984 (raidPtr->Layout.map->MapSector)
985 (raidPtr, pda_p->raidAddress,
986 &(pda_p->row), &(pda_p->col),
987 &(pda_p->startSector), 0);
988 RF_MallocAndAdd(pda_p->bufPtr,
989 rf_RaidAddressToByte(raidPtr,
990 pda_p->numSector), (char *),
991 allocList);
992 pda_p++;
993 }
994 if (suend < fone_end) {
995 RF_ASSERT(suoff <= fone_start);
996
997
998
999
1000 pda_p->numSector = fone_end - suend;
1001 pda_p->raidAddress = sosAddr +
1002 (i * secPerSU) +
1003 suend;
1004 (raidPtr->Layout.map->MapSector)
1005 (raidPtr, pda_p->raidAddress,
1006 &(pda_p->row), &(pda_p->col),
1007 &(pda_p->startSector), 0);
1008 RF_MallocAndAdd(pda_p->bufPtr,
1009 rf_RaidAddressToByte(raidPtr,
1010 pda_p->numSector), (char *),
1011 allocList);
1012 pda_p++;
1013 }
1014 }
1015 if ((suoff > ftwo_start) || (suend < ftwo_end)) {
1016 if (suoff > ftwo_start) {
1017 RF_ASSERT(suend >= ftwo_end);
1018
1019
1020
1021
1022
1023 pda_p->numSector = suoff - ftwo_start;
1024 pda_p->raidAddress = sosAddr +
1025 (i * secPerSU) + ftwo_start;
1026 (raidPtr->Layout.map->MapSector)
1027 (raidPtr, pda_p->raidAddress,
1028 &(pda_p->row), &(pda_p->col),
1029 &(pda_p->startSector), 0);
1030 RF_MallocAndAdd(pda_p->bufPtr,
1031 rf_RaidAddressToByte(raidPtr,
1032 pda_p->numSector), (char *),
1033 allocList);
1034 pda_p++;
1035 }
1036 if (suend < ftwo_end) {
1037 RF_ASSERT(suoff <= ftwo_start);
1038
1039
1040
1041
1042 pda_p->numSector = ftwo_end - suend;
1043 pda_p->raidAddress = sosAddr +
1044 (i * secPerSU) +
1045 suend;
1046 (raidPtr->Layout.map->MapSector)
1047 (raidPtr, pda_p->raidAddress,
1048 &(pda_p->row), &(pda_p->col),
1049 &(pda_p->startSector), 0);
1050 RF_MallocAndAdd(pda_p->bufPtr,
1051 rf_RaidAddressToByte(raidPtr,
1052 pda_p->numSector), (char *),
1053 allocList);
1054 pda_p++;
1055 }
1056 }
1057 break;
1058 default:
1059 RF_PANIC();
1060 }
1061 }
1062
1063
1064 for (; i < numDataCol; i++) {
1065 if ((pda_p - (*pdap)) == napdas)
1066 continue;
1067 pda_p->type = RF_PDA_TYPE_DATA;
1068 pda_p->raidAddress = sosAddr + (i * secPerSU);
1069 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress,
1070 &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
1071
1072 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
1073 continue;
1074 switch (state) {
1075 case 1:
1076 pda_p->numSector = fone->numSector;
1077 pda_p->raidAddress += fone_start;
1078 pda_p->startSector += fone_start;
1079 RF_MallocAndAdd(pda_p->bufPtr,
1080 rf_RaidAddressToByte(raidPtr, pda_p->numSector),
1081 (char *), allocList);
1082 break;
1083 case 2:
1084 pda_p->numSector = secPerSU;
1085 RF_MallocAndAdd(pda_p->bufPtr,
1086 rf_RaidAddressToByte(raidPtr, secPerSU),
1087 (char *), allocList);
1088 break;
1089 case 3:
1090 pda_p->numSector = fone->numSector;
1091 pda_p->raidAddress += fone_start;
1092 pda_p->startSector += fone_start;
1093 RF_MallocAndAdd(pda_p->bufPtr,
1094 rf_RaidAddressToByte(raidPtr, pda_p->numSector),
1095 (char *), allocList);
1096 pda_p++;
1097 pda_p->type = RF_PDA_TYPE_DATA;
1098 pda_p->raidAddress = sosAddr + (i * secPerSU);
1099 (raidPtr->Layout.map->MapSector) (raidPtr,
1100 pda_p->raidAddress, &(pda_p->row), &(pda_p->col),
1101 &(pda_p->startSector), 0);
1102 pda_p->numSector = ftwo->numSector;
1103 pda_p->raidAddress += ftwo_start;
1104 pda_p->startSector += ftwo_start;
1105 RF_MallocAndAdd(pda_p->bufPtr,
1106 rf_RaidAddressToByte(raidPtr, pda_p->numSector),
1107 (char *), allocList);
1108 break;
1109 default:
1110 RF_PANIC();
1111 }
1112 pda_p++;
1113 }
1114
1115 RF_ASSERT(pda_p - *pdap == napdas);
1116 return;
1117 }
1118
1119 #define INIT_DISK_NODE(node,name) do { \
1120 rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, \
1121 rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, \
1122 dag_h, name, allocList); \
1123 (node)->succedents[0] = unblockNode; \
1124 (node)->succedents[1] = recoveryNode; \
1125 (node)->antecedents[0] = blockNode; \
1126 (node)->antType[0] = rf_control; \
1127 } while (0)
1128
1129 #define DISK_NODE_PARAMS(_node_,_p_) do { \
1130 (_node_).params[0].p = _p_ ; \
1131 (_node_).params[1].p = (_p_)->bufPtr; \
1132 (_node_).params[2].v = parityStripeID; \
1133 (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, \
1134 0, 0, which_ru); \
1135 } while (0)
1136
1137 void
1138 rf_DoubleDegRead(
1139 RF_Raid_t *raidPtr,
1140 RF_AccessStripeMap_t *asmap,
1141 RF_DagHeader_t *dag_h,
1142 void *bp,
1143 RF_RaidAccessFlags_t flags,
1144 RF_AllocListElem_t *allocList,
1145 char *redundantReadNodeName,
1146 char *recoveryNodeName,
1147 int (*recovFunc) (RF_DagNode_t *)
1148 )
1149 {
1150 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
1151 RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode,
1152 *unblockNode, *rpNodes, *rqNodes, *termNode;
1153 RF_PhysDiskAddr_t *pda, *pqPDAs;
1154 RF_PhysDiskAddr_t *npdas;
1155 int nNodes, nRrdNodes, nRudNodes, i;
1156 RF_ReconUnitNum_t which_ru;
1157 int nReadNodes, nPQNodes;
1158 RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
1159 RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1];
1160 RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(
1161 layoutPtr, asmap->raidAddress, &which_ru);
1162
1163 if (rf_dagDebug)
1164 printf("[Creating Double Degraded Read DAG]\n");
1165 rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes,
1166 &pqPDAs, &nPQNodes, allocList);
1167
1168 nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
1169 nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes;
1170 nNodes = 4 + nReadNodes;
1171
1172 RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
1173 allocList);
1174 i = 0;
1175 blockNode = &nodes[i];
1176 i += 1;
1177 unblockNode = &nodes[i];
1178 i += 1;
1179 recoveryNode = &nodes[i];
1180 i += 1;
1181 termNode = &nodes[i];
1182 i += 1;
1183 rudNodes = &nodes[i];
1184 i += nRudNodes;
1185 rrdNodes = &nodes[i];
1186 i += nRrdNodes;
1187 rpNodes = &nodes[i];
1188 i += nPQNodes;
1189 rqNodes = &nodes[i];
1190 i += nPQNodes;
1191 RF_ASSERT(i == nNodes);
1192
1193 dag_h->numSuccedents = 1;
1194 dag_h->succedents[0] = blockNode;
1195 dag_h->creator = "DoubleDegRead";
1196 dag_h->numCommits = 0;
1197 dag_h->numCommitNodes = 1;
1198
1199 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1200 rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList);
1201 termNode->antecedents[0] = unblockNode;
1202 termNode->antType[0] = rf_control;
1203 termNode->antecedents[1] = recoveryNode;
1204 termNode->antType[1] = rf_control;
1205
1206
1207
1208
1209
1210
1211
1212 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1213 rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h,
1214 "Nil", allocList);
1215 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1216 rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h,
1217 "Nil", allocList);
1218
1219 for (i = 0; i < nReadNodes; i++) {
1220 blockNode->succedents[i] = rudNodes + i;
1221 unblockNode->antecedents[i] = rudNodes + i;
1222 unblockNode->antType[i] = rf_control;
1223 }
1224 unblockNode->succedents[0] = termNode;
1225
1226
1227
1228
1229
1230
1231 rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc,
1232 rf_NullNodeUndoFunc, NULL,
1233 1,
1234 nReadNodes,
1235 nReadNodes + 2,
1236 asmap->numDataFailed,
1237 dag_h, recoveryNodeName, allocList);
1238
1239 recoveryNode->succedents[0] = termNode;
1240 for (i = 0; i < nReadNodes; i++) {
1241 recoveryNode->antecedents[i] = rudNodes + i;
1242 recoveryNode->antType[i] = rf_trueData;
1243 }
1244
1245
1246
1247
1248
1249 pda = asmap->physInfo;
1250 for (i = 0; i < nRudNodes; pda = pda->next) {
1251 if ((pda == failedPDA) || (pda == failedPDAtwo))
1252 continue;
1253 INIT_DISK_NODE(rudNodes + i, "Rud");
1254 RF_ASSERT(pda);
1255 DISK_NODE_PARAMS(rudNodes[i], pda);
1256 i++;
1257 }
1258
1259 pda = npdas;
1260 for (i = 0; i < nRrdNodes; i++, pda = pda->next) {
1261 INIT_DISK_NODE(rrdNodes + i, "Rrd");
1262 RF_ASSERT(pda);
1263 DISK_NODE_PARAMS(rrdNodes[i], pda);
1264 }
1265
1266
1267 pda = pqPDAs;
1268 INIT_DISK_NODE(rpNodes, "Rp");
1269 RF_ASSERT(pda);
1270 DISK_NODE_PARAMS(rpNodes[0], pda);
1271 pda++;
1272 INIT_DISK_NODE(rqNodes, redundantReadNodeName);
1273 RF_ASSERT(pda);
1274 DISK_NODE_PARAMS(rqNodes[0], pda);
1275 if (nPQNodes == 2) {
1276 pda++;
1277 INIT_DISK_NODE(rpNodes + 1, "Rp");
1278 RF_ASSERT(pda);
1279 DISK_NODE_PARAMS(rpNodes[1], pda);
1280 pda++;
1281 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName);
1282 RF_ASSERT(pda);
1283 DISK_NODE_PARAMS(rqNodes[1], pda);
1284 }
1285
1286 for (i = 0; i < nReadNodes; i++)
1287 recoveryNode->params[i] = rudNodes[i].params[0];
1288 recoveryNode->params[i++].p = (void *) raidPtr;
1289 recoveryNode->params[i++].p = (void *) asmap;
1290 recoveryNode->results[0] = failedPDA;
1291 if (asmap->numDataFailed == 2)
1292 recoveryNode->results[1] = failedPDAtwo;
1293
1294
1295 }