This source file includes following definitions.
- rf_InitNode
- rf_FreeDAG
- rf_MakePropListEntry
- rf_ShutdownDAGs
- rf_ConfigureDAGs
- rf_AllocDAGHeader
- rf_FreeDAGHeader
- rf_AllocBuffer
- rf_NodeStatusString
- rf_PrintNodeInfoString
- rf_RecurPrintDAG
- rf_PrintDAG
- rf_AssignNodeNums
- rf_RecurAssignNodeNums
- rf_ResetDAGHeaderPointers
- rf_RecurResetDAGHeaderPointers
- rf_PrintDAGList
- rf_ValidateBranch
- rf_ValidateBranchVisitedBits
- rf_ValidateVisitedBits
- rf_ValidateDAG
- rf_redirect_asm
- rf_MapUnaccessedPortionOfStripe
- rf_PDAOverlap
- rf_GenerateFailedAccessASMs
- rf_RangeRestrictPDA
- rf_compute_workload_shift
- rf_SelectMirrorDiskIdle
- rf_SelectMirrorDiskPartition
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 #include "rf_archs.h"
38 #include "rf_types.h"
39 #include "rf_threadstuff.h"
40 #include "rf_raid.h"
41 #include "rf_dag.h"
42 #include "rf_dagutils.h"
43 #include "rf_dagfuncs.h"
44 #include "rf_general.h"
45 #include "rf_freelist.h"
46 #include "rf_map.h"
47 #include "rf_shutdown.h"
48
49 #define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))
50
51 RF_RedFuncs_t rf_xorFuncs = {
52 rf_RegularXorFunc, "Reg Xr", rf_SimpleXorFunc, "Simple Xr"
53 };
54
55 RF_RedFuncs_t rf_xorRecoveryFuncs = {
56 rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"
57 };
58
59 void rf_RecurPrintDAG(RF_DagNode_t *, int, int);
60 void rf_PrintDAG(RF_DagHeader_t *);
61 int rf_ValidateBranch(RF_DagNode_t *, int *, int *, RF_DagNode_t **, int);
62 void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int);
63 void rf_ValidateVisitedBits(RF_DagHeader_t *);
64
65
66
67
68
69
70
71
72
73 void
74 rf_InitNode(
75 RF_DagNode_t *node,
76 RF_NodeStatus_t initstatus,
77 int commit,
78 int (*doFunc) (RF_DagNode_t *),
79 int (*undoFunc) (RF_DagNode_t *node),
80 int (*wakeFunc) (RF_DagNode_t *node, int),
81 int nSucc,
82 int nAnte,
83 int nParam,
84 int nResult,
85 RF_DagHeader_t *hdr,
86 char *name,
87 RF_AllocListElem_t *alist
88 )
89 {
90 void **ptrs;
91 int nptrs;
92
93 if (nAnte > RF_MAX_ANTECEDENTS)
94 RF_PANIC();
95 node->status = initstatus;
96 node->commitNode = commit;
97 node->doFunc = doFunc;
98 node->undoFunc = undoFunc;
99 node->wakeFunc = wakeFunc;
100 node->numParams = nParam;
101 node->numResults = nResult;
102 node->numAntecedents = nAnte;
103 node->numAntDone = 0;
104 node->next = NULL;
105 node->numSuccedents = nSucc;
106 node->name = name;
107 node->dagHdr = hdr;
108 node->visited = 0;
109
110
111 nptrs = nSucc + nAnte + nResult + nSucc;
112
113 if (nptrs <= RF_DAG_PTRCACHESIZE) {
114
115
116
117
118
119
120
121
122
123
124
125 ptrs = (void **) node->dag_ptrs;
126 } else {
127 RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist);
128 }
129 node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL;
130 node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL;
131 node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL;
132 node->propList = (nSucc) ? (RF_PropHeader_t **)
133 (ptrs + nSucc + nAnte + nResult) : NULL;
134
135 if (nParam) {
136 if (nParam <= RF_DAG_PARAMCACHESIZE) {
137 node->params = (RF_DagParam_t *) node->dag_params;
138 } else {
139 RF_CallocAndAdd(node->params, nParam,
140 sizeof(RF_DagParam_t), (RF_DagParam_t *), alist);
141 }
142 } else {
143 node->params = NULL;
144 }
145 }
146
147
148
149
150
151
152
153
154
155 void
156 rf_FreeDAG(RF_DagHeader_t *dag_h)
157 {
158 RF_AccessStripeMapHeader_t *asmap, *t_asmap;
159 RF_DagHeader_t *nextDag;
160 int i;
161
162 while (dag_h) {
163 nextDag = dag_h->next;
164 for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) {
165
166 rf_ReleaseMemChunk(dag_h->memChunk[i]);
167 dag_h->memChunk[i] = NULL;
168 }
169
170 RF_ASSERT(i == dag_h->chunkIndex);
171 if (dag_h->xtraChunkCnt > 0) {
172
173 for (i = 0; dag_h->xtraMemChunk[i] &&
174 i < dag_h->xtraChunkIndex; i++) {
175 rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]);
176 dag_h->xtraMemChunk[i] = NULL;
177 }
178 RF_ASSERT(i == dag_h->xtraChunkIndex);
179
180 RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt *
181 sizeof(RF_ChunkDesc_t *));
182 }
183 rf_FreeAllocList(dag_h->allocList);
184 for (asmap = dag_h->asmList; asmap;) {
185 t_asmap = asmap;
186 asmap = asmap->next;
187 rf_FreeAccessStripeMap(t_asmap);
188 }
189 rf_FreeDAGHeader(dag_h);
190 dag_h = nextDag;
191 }
192 }
193
194 RF_PropHeader_t *
195 rf_MakePropListEntry(RF_DagHeader_t *dag_h, int resultNum, int paramNum,
196 RF_PropHeader_t *next, RF_AllocListElem_t *allocList)
197 {
198 RF_PropHeader_t *p;
199
200 RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), (RF_PropHeader_t *),
201 allocList);
202 p->resultNum = resultNum;
203 p->paramNum = paramNum;
204 p->next = next;
205 return (p);
206 }
207
208 static RF_FreeList_t *rf_dagh_freelist;
209
210 #define RF_MAX_FREE_DAGH 128
211 #define RF_DAGH_INC 16
212 #define RF_DAGH_INITIAL 32
213
214 void rf_ShutdownDAGs(void *);
215 void
216 rf_ShutdownDAGs(void *ignored)
217 {
218 RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *));
219 }
220
221 int
222 rf_ConfigureDAGs(RF_ShutdownList_t **listp)
223 {
224 int rc;
225
226 RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, RF_DAGH_INC,
227 sizeof(RF_DagHeader_t));
228 if (rf_dagh_freelist == NULL)
229 return (ENOMEM);
230 rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL);
231 if (rc) {
232 RF_ERRORMSG3("Unable to add to shutdown list file %s line"
233 " %d rc=%d\n", __FILE__, __LINE__, rc);
234 rf_ShutdownDAGs(NULL);
235 return (rc);
236 }
237 RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL, next,
238 (RF_DagHeader_t *));
239 return (0);
240 }
241
242 RF_DagHeader_t *
243 rf_AllocDAGHeader(void)
244 {
245 RF_DagHeader_t *dh;
246
247 RF_FREELIST_GET(rf_dagh_freelist, dh, next, (RF_DagHeader_t *));
248 if (dh) {
249 bzero((char *) dh, sizeof(RF_DagHeader_t));
250 }
251 return (dh);
252 }
253
254 void
255 rf_FreeDAGHeader(RF_DagHeader_t *dh)
256 {
257 RF_FREELIST_FREE(rf_dagh_freelist, dh, next);
258 }
259
260
261 void *
262 rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h,
263 RF_PhysDiskAddr_t *pda, RF_AllocListElem_t *allocList)
264 {
265 char *p;
266
267 RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector,
268 (char *), allocList);
269 return ((void *) p);
270 }
271
272
273
274
275
276
277
278
279 char *
280 rf_NodeStatusString(RF_DagNode_t *node)
281 {
282 switch (node->status) {
283 case rf_wait:
284 return ("wait");
285 case rf_fired:
286 return ("fired");
287 case rf_good:
288 return ("good");
289 case rf_bad:
290 return ("bad");
291 default:
292 return ("?");
293 }
294 }
295
296 void
297 rf_PrintNodeInfoString(RF_DagNode_t *node)
298 {
299 RF_PhysDiskAddr_t *pda;
300 int (*df) (RF_DagNode_t *) = node->doFunc;
301 int i, lk, unlk;
302 void *bufPtr;
303
304 if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) ||
305 (df == rf_DiskReadMirrorIdleFunc) ||
306 (df == rf_DiskReadMirrorPartitionFunc)) {
307 pda = (RF_PhysDiskAddr_t *) node->params[0].p;
308 bufPtr = (void *) node->params[1].p;
309 lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
310 unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
311 RF_ASSERT(!(lk && unlk));
312 printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row,
313 pda->col, (long) pda->startSector, (int) pda->numSector,
314 (long) bufPtr, (lk) ? "LOCK" : ((unlk) ? "UNLK" : " "));
315 return;
316 }
317 if (df == rf_DiskUnlockFunc) {
318 pda = (RF_PhysDiskAddr_t *) node->params[0].p;
319 lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
320 unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
321 RF_ASSERT(!(lk && unlk));
322 printf("r %d c %d %s\n", pda->row, pda->col,
323 (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop"));
324 return;
325 }
326 if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc)
327 || (df == rf_RecoveryXorFunc)) {
328 printf("result buf 0x%lx\n", (long) node->results[0]);
329 for (i = 0; i < node->numParams - 1; i += 2) {
330 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
331 bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
332 printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n",
333 (long) bufPtr, pda->row, pda->col,
334 (long) pda->startSector, (int) pda->numSector);
335 }
336 return;
337 }
338 #if RF_INCLUDE_PARITYLOGGING > 0
339 if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) {
340 for (i = 0; i < node->numParams - 1; i += 2) {
341 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
342 bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
343 printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n",
344 pda->row, pda->col, (long) pda->startSector,
345 (int) pda->numSector, (long) bufPtr);
346 }
347 return;
348 }
349 #endif
350
351 if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) {
352 printf("\n");
353 return;
354 }
355 printf("?\n");
356 }
357
358 void
359 rf_RecurPrintDAG(RF_DagNode_t *node, int depth, int unvisited)
360 {
361 char *anttype;
362 int i;
363
364 node->visited = (unvisited) ? 0 : 1;
365 printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth,
366 node->nodeNum, node->commitNode, node->name,
367 rf_NodeStatusString(node), node->numSuccedents,
368 node->numSuccFired, node->numSuccDone,
369 node->numAntecedents, node->numAntDone,
370 node->numParams, node->numResults);
371 for (i = 0; i < node->numSuccedents; i++) {
372 printf("%d%s", node->succedents[i]->nodeNum,
373 ((i == node->numSuccedents - 1) ? "\0" : " "));
374 }
375 printf("} A{");
376 for (i = 0; i < node->numAntecedents; i++) {
377 switch (node->antType[i]) {
378 case rf_trueData:
379 anttype = "T";
380 break;
381 case rf_antiData:
382 anttype = "A";
383 break;
384 case rf_outputData:
385 anttype = "O";
386 break;
387 case rf_control:
388 anttype = "C";
389 break;
390 default:
391 anttype = "?";
392 break;
393 }
394 printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype,
395 (i == node->numAntecedents - 1) ? "\0" : " ");
396 }
397 printf("}; ");
398 rf_PrintNodeInfoString(node);
399 for (i = 0; i < node->numSuccedents; i++) {
400 if (node->succedents[i]->visited == unvisited)
401 rf_RecurPrintDAG(node->succedents[i], depth + 1,
402 unvisited);
403 }
404 }
405
406 void
407 rf_PrintDAG(RF_DagHeader_t *dag_h)
408 {
409 int unvisited, i;
410 char *status;
411
412
413 switch (dag_h->status) {
414 case rf_enable:
415 status = "enable";
416 break;
417 case rf_rollForward:
418 status = "rollForward";
419 break;
420 case rf_rollBackward:
421 status = "rollBackward";
422 break;
423 default:
424 status = "illegal !";
425 break;
426 }
427
428 unvisited = dag_h->succedents[0]->visited;
429
430 printf("DAG type: %s\n", dag_h->creator);
431 printf("format is (depth) num commit type: status,nSucc nSuccFired/n"
432 "SuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n");
433 printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum,
434 status, dag_h->numSuccedents, dag_h->numCommitNodes,
435 dag_h->numCommits);
436 for (i = 0; i < dag_h->numSuccedents; i++) {
437 printf("%d%s", dag_h->succedents[i]->nodeNum,
438 ((i == dag_h->numSuccedents - 1) ? "\0" : " "));
439 }
440 printf("};\n");
441 for (i = 0; i < dag_h->numSuccedents; i++) {
442 if (dag_h->succedents[i]->visited == unvisited)
443 rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited);
444 }
445 }
446
447
448 int
449 rf_AssignNodeNums(RF_DagHeader_t *dag_h)
450 {
451 int unvisited, i, nnum;
452 RF_DagNode_t *node;
453
454 nnum = 0;
455 unvisited = dag_h->succedents[0]->visited;
456
457 dag_h->nodeNum = nnum++;
458 for (i = 0; i < dag_h->numSuccedents; i++) {
459 node = dag_h->succedents[i];
460 if (node->visited == unvisited) {
461 nnum = rf_RecurAssignNodeNums(dag_h->succedents[i],
462 nnum, unvisited);
463 }
464 }
465 return (nnum);
466 }
467
468 int
469 rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited)
470 {
471 int i;
472
473 node->visited = (unvisited) ? 0 : 1;
474
475 node->nodeNum = num++;
476 for (i = 0; i < node->numSuccedents; i++) {
477 if (node->succedents[i]->visited == unvisited) {
478 num = rf_RecurAssignNodeNums(node->succedents[i],
479 num, unvisited);
480 }
481 }
482 return (num);
483 }
484
485
486 void
487 rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr)
488 {
489 int i;
490
491 for (i = 0; i < dag_h->numSuccedents; i++)
492 if (dag_h->succedents[i]->dagHdr != newptr)
493 rf_RecurResetDAGHeaderPointers(dag_h->succedents[i],
494 newptr);
495 }
496
497 void
498 rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr)
499 {
500 int i;
501
502 node->dagHdr = newptr;
503 for (i = 0; i < node->numSuccedents; i++)
504 if (node->succedents[i]->dagHdr != newptr)
505 rf_RecurResetDAGHeaderPointers(node->succedents[i],
506 newptr);
507 }
508
509 void
510 rf_PrintDAGList(RF_DagHeader_t *dag_h)
511 {
512 int i = 0;
513
514 for (; dag_h; dag_h = dag_h->next) {
515 rf_AssignNodeNums(dag_h);
516 printf("\n\nDAG %d IN LIST:\n", i++);
517 rf_PrintDAG(dag_h);
518 }
519 }
520
521 int
522 rf_ValidateBranch(RF_DagNode_t *node, int *scount, int *acount,
523 RF_DagNode_t **nodes, int unvisited)
524 {
525 int i, retcode = 0;
526
527
528 node->visited = (unvisited) ? 0 : 1;
529 nodes[node->nodeNum] = node;
530
531 if (node->next != NULL) {
532 printf("INVALID DAG: next pointer in node is not NULL.\n");
533 retcode = 1;
534 }
535 if (node->status != rf_wait) {
536 printf("INVALID DAG: Node status is not wait.\n");
537 retcode = 1;
538 }
539 if (node->numAntDone != 0) {
540 printf("INVALID DAG: numAntDone is not zero.\n");
541 retcode = 1;
542 }
543 if (node->doFunc == rf_TerminateFunc) {
544 if (node->numSuccedents != 0) {
545 printf("INVALID DAG: Terminator node has"
546 " succedents.\n");
547 retcode = 1;
548 }
549 } else {
550 if (node->numSuccedents == 0) {
551 printf("INVALID DAG: Non-terminator node has no"
552 " succedents\n");
553 retcode = 1;
554 }
555 }
556 for (i = 0; i < node->numSuccedents; i++) {
557 if (!node->succedents[i]) {
558 printf("INVALID DAG: succedent %d of node %s"
559 " is NULL.\n", i, node->name);
560 retcode = 1;
561 }
562 scount[node->succedents[i]->nodeNum]++;
563 }
564 for (i = 0; i < node->numAntecedents; i++) {
565 if (!node->antecedents[i]) {
566 printf("INVALID DAG: antecedent %d of node %s is"
567 " NULL.\n", i, node->name);
568 retcode = 1;
569 }
570 acount[node->antecedents[i]->nodeNum]++;
571 }
572 for (i = 0; i < node->numSuccedents; i++) {
573 if (node->succedents[i]->visited == unvisited) {
574 if (rf_ValidateBranch(node->succedents[i], scount,
575 acount, nodes, unvisited)) {
576 retcode = 1;
577 }
578 }
579 }
580 return (retcode);
581 }
582
583 void
584 rf_ValidateBranchVisitedBits(RF_DagNode_t *node, int unvisited, int rl)
585 {
586 int i;
587
588 RF_ASSERT(node->visited == unvisited);
589 for (i = 0; i < node->numSuccedents; i++) {
590 if (node->succedents[i] == NULL) {
591 printf("node=%lx node->succedents[%d] is NULL.\n",
592 (long) node, i);
593 RF_ASSERT(0);
594 }
595 rf_ValidateBranchVisitedBits(node->succedents[i],
596 unvisited, rl + 1);
597 }
598 }
599
600
601
602
603
604 void
605 rf_ValidateVisitedBits(RF_DagHeader_t *dag)
606 {
607 int i, unvisited;
608
609 unvisited = dag->succedents[0]->visited;
610
611 for (i = 0; i < dag->numSuccedents; i++) {
612 if (dag->succedents[i] == NULL) {
613 printf("dag=%lx dag->succedents[%d] is NULL.\n",
614 (long) dag, i);
615 RF_ASSERT(0);
616 }
617 rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0);
618 }
619 }
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638 int
639 rf_ValidateDAG(RF_DagHeader_t *dag_h)
640 {
641 int i, nodecount;
642 int *scount, *acount;
643 RF_DagNode_t **nodes;
644 int retcode = 0;
645 int unvisited;
646 int commitNodeCount = 0;
647
648 if (rf_validateVisitedDebug)
649 rf_ValidateVisitedBits(dag_h);
650
651 if (dag_h->numNodesCompleted != 0) {
652 printf("INVALID DAG: num nodes completed is %d, should be 0.\n",
653 dag_h->numNodesCompleted);
654 retcode = 1;
655 goto validate_dag_bad;
656 }
657 if (dag_h->status != rf_enable) {
658 printf("INVALID DAG: not enabled.\n");
659 retcode = 1;
660 goto validate_dag_bad;
661 }
662 if (dag_h->numCommits != 0) {
663 printf("INVALID DAG: numCommits != 0 (%d)\n",
664 dag_h->numCommits);
665 retcode = 1;
666 goto validate_dag_bad;
667 }
668 if (dag_h->numSuccedents != 1) {
669
670 printf("INVALID DAG: numSuccedents != 1 (%d).\n",
671 dag_h->numSuccedents);
672 retcode = 1;
673 goto validate_dag_bad;
674 }
675 nodecount = rf_AssignNodeNums(dag_h);
676
677 unvisited = dag_h->succedents[0]->visited;
678
679 RF_Calloc(scount, nodecount, sizeof(int), (int *));
680 RF_Calloc(acount, nodecount, sizeof(int), (int *));
681 RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **));
682 for (i = 0; i < dag_h->numSuccedents; i++) {
683 if ((dag_h->succedents[i]->visited == unvisited)
684 && rf_ValidateBranch(dag_h->succedents[i], scount,
685 acount, nodes, unvisited)) {
686 retcode = 1;
687 }
688 }
689
690 for (i = 1; i < nodecount; i++) {
691 if (nodes[i]->commitNode)
692 commitNodeCount++;
693 if (nodes[i]->doFunc == NULL) {
694 printf("INVALID DAG: node %s has an undefined"
695 " doFunc.\n", nodes[i]->name);
696 retcode = 1;
697 goto validate_dag_out;
698 }
699 if (nodes[i]->undoFunc == NULL) {
700 printf("INVALID DAG: node %s has an undefined"
701 " doFunc.\n", nodes[i]->name);
702 retcode = 1;
703 goto validate_dag_out;
704 }
705 if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) {
706 printf("INVALID DAG: node %s has %d antecedents but"
707 " appears as a succedent %d times.\n",
708 nodes[i]->name, nodes[i]->numAntecedents,
709 scount[nodes[i]->nodeNum]);
710 retcode = 1;
711 goto validate_dag_out;
712 }
713 if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) {
714 printf("INVALID DAG: node %s has %d succedents but"
715 " appears as an antecedent %d times.\n",
716 nodes[i]->name, nodes[i]->numSuccedents,
717 acount[nodes[i]->nodeNum]);
718 retcode = 1;
719 goto validate_dag_out;
720 }
721 }
722
723 if (dag_h->numCommitNodes != commitNodeCount) {
724 printf("INVALID DAG: incorrect commit node count. "
725 "hdr->numCommitNodes (%d) found (%d) commit nodes"
726 " in graph.\n",
727 dag_h->numCommitNodes, commitNodeCount);
728 retcode = 1;
729 goto validate_dag_out;
730 }
731
732 validate_dag_out:
733 RF_Free(scount, nodecount * sizeof(int));
734 RF_Free(acount, nodecount * sizeof(int));
735 RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *));
736 if (retcode)
737 rf_PrintDAGList(dag_h);
738
739 if (rf_validateVisitedDebug)
740 rf_ValidateVisitedBits(dag_h);
741
742 return (retcode);
743
744 validate_dag_bad:
745 rf_PrintDAGList(dag_h);
746 return (retcode);
747 }
748
749
750
751
752
753
754
755
756 void
757 rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
758 {
759 int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0;
760 int row = asmap->physInfo->row;
761 int fcol = raidPtr->reconControl[row]->fcol;
762 int srow = raidPtr->reconControl[row]->spareRow;
763 int scol = raidPtr->reconControl[row]->spareCol;
764 RF_PhysDiskAddr_t *pda;
765
766 RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing);
767 for (pda = asmap->physInfo; pda; pda = pda->next) {
768 if (pda->col == fcol) {
769 if (rf_dagDebug) {
770 if (!rf_CheckRUReconstructed(
771 raidPtr->reconControl[row]->reconMap,
772 pda->startSector)) {
773 RF_PANIC();
774 }
775 }
776
777 if (ds) {
778 raidPtr->Layout.map->MapSector(raidPtr,
779 pda->raidAddress, &pda->row, &pda->col,
780 &pda->startSector, RF_REMAP);
781 } else {
782 pda->row = srow;
783 pda->col = scol;
784 }
785 }
786 }
787 for (pda = asmap->parityInfo; pda; pda = pda->next) {
788 if (pda->col == fcol) {
789 if (rf_dagDebug) {
790 if (!rf_CheckRUReconstructed(
791 raidPtr->reconControl[row]->reconMap,
792 pda->startSector)) {
793 RF_PANIC();
794 }
795 }
796 }
797 if (ds) {
798 (raidPtr->Layout.map->MapParity) (raidPtr,
799 pda->raidAddress, &pda->row, &pda->col,
800 &pda->startSector, RF_REMAP);
801 } else {
802 pda->row = srow;
803 pda->col = scol;
804 }
805 }
806 }
807
808
809
810
811
812
813
814
815
816
817
818 void
819 rf_MapUnaccessedPortionOfStripe(
820 RF_Raid_t *raidPtr,
821 RF_RaidLayout_t *layoutPtr,
822 RF_AccessStripeMap_t *asmap,
823 RF_DagHeader_t *dag_h,
824
825 RF_AccessStripeMapHeader_t **new_asm_h,
826
827
828 int *nRodNodes,
829
830
831 char **sosBuffer,
832
833 char **eosBuffer,
834 RF_AllocListElem_t *allocList
835 )
836 {
837 RF_RaidAddr_t sosRaidAddress, eosRaidAddress;
838 RF_SectorNum_t sosNumSector, eosNumSector;
839
840 RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2));
841
842
843
844
845 new_asm_h[0] = new_asm_h[1] = NULL;
846 *nRodNodes = 0;
847 if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) {
848 sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
849 asmap->raidAddress);
850 sosNumSector = asmap->raidAddress - sosRaidAddress;
851 RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr,
852 sosNumSector), (char *), allocList);
853 new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress,
854 sosNumSector, *sosBuffer, RF_DONT_REMAP);
855 new_asm_h[0]->next = dag_h->asmList;
856 dag_h->asmList = new_asm_h[0];
857 *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
858
859 RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL);
860
861 if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
862 rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap);
863 }
864
865
866
867
868 if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) {
869 eosRaidAddress = asmap->endRaidAddress;
870 eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr,
871 eosRaidAddress) - eosRaidAddress;
872 RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr,
873 eosNumSector), (char *), allocList);
874 new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress,
875 eosNumSector, *eosBuffer, RF_DONT_REMAP);
876 new_asm_h[1]->next = dag_h->asmList;
877 dag_h->asmList = new_asm_h[1];
878 *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
879
880 RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL);
881
882 if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
883 rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap);
884 }
885 }
886
887
888
889 int
890 rf_PDAOverlap(RF_RaidLayout_t *layoutPtr, RF_PhysDiskAddr_t *src,
891 RF_PhysDiskAddr_t *dest)
892 {
893 RF_SectorNum_t soffs =
894 rf_StripeUnitOffset(layoutPtr, src->startSector);
895 RF_SectorNum_t doffs =
896 rf_StripeUnitOffset(layoutPtr, dest->startSector);
897
898 RF_SectorNum_t send =
899 rf_StripeUnitOffset(layoutPtr, src->startSector +
900 src->numSector - 1);
901 RF_SectorNum_t dend =
902 rf_StripeUnitOffset(layoutPtr, dest->startSector +
903 dest->numSector - 1);
904
905 return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0);
906 }
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938 void
939 rf_GenerateFailedAccessASMs(
940 RF_Raid_t *raidPtr,
941 RF_AccessStripeMap_t *asmap,
942 RF_PhysDiskAddr_t *failedPDA,
943 RF_DagHeader_t *dag_h,
944 RF_AccessStripeMapHeader_t **new_asm_h,
945 int *nXorBufs,
946 char **rpBufPtr,
947 char *overlappingPDAs,
948 RF_AllocListElem_t *allocList
949 )
950 {
951 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
952
953
954 RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr;
955
956 RF_SectorCount_t numSect[2], numParitySect;
957 RF_PhysDiskAddr_t *pda;
958 char *rdBuf, *bufP;
959 int foundit, i;
960
961 bufP = NULL;
962 foundit = 0;
963
964
965
966
967
968
969
970
971 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
972 asmap->raidAddress);
973 sosEndAddr = RF_MIN(asmap->raidAddress,
974 rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
975 failedPDA->raidAddress));
976 eosStartAddr = RF_MAX(asmap->endRaidAddress,
977 rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
978 failedPDA->raidAddress));
979 eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr,
980 asmap->raidAddress);
981
982
983
984
985
986
987 new_asm_h[0] = (sosAddr != sosEndAddr) ?
988 rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL,
989 RF_DONT_REMAP) : NULL;
990 new_asm_h[1] = (eosStartAddr != eosAddr) ?
991 rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL,
992 RF_DONT_REMAP) : NULL;
993
994
995
996
997
998
999
1000 numSect[0] = numSect[1] = 0;
1001 if (new_asm_h[0]) {
1002 new_asm_h[0]->next = dag_h->asmList;
1003 dag_h->asmList = new_asm_h[0];
1004 for (pda = new_asm_h[0]->stripeMap->physInfo; pda;
1005 pda = pda->next) {
1006 rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
1007 RF_RESTRICT_NOBUFFER, 0);
1008 numSect[0] += pda->numSector;
1009 }
1010 }
1011 if (new_asm_h[1]) {
1012 new_asm_h[1]->next = dag_h->asmList;
1013 dag_h->asmList = new_asm_h[1];
1014 for (pda = new_asm_h[1]->stripeMap->physInfo;
1015 pda; pda = pda->next) {
1016 rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
1017 RF_RESTRICT_NOBUFFER, 0);
1018 numSect[1] += pda->numSector;
1019 }
1020 }
1021 numParitySect = failedPDA->numSector;
1022
1023
1024
1025
1026
1027
1028 if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) {
1029
1030 RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr,
1031 numSect[0] + numSect[1] + numParitySect), (char *),
1032 allocList);
1033 bufP = rdBuf;
1034 if (rf_degDagDebug)
1035 printf("Newly allocated buffer (%d bytes) is 0x%lx\n",
1036 (int) rf_RaidAddressToByte(raidPtr,
1037 numSect[0] + numSect[1] + numParitySect),
1038 (unsigned long) bufP);
1039 }
1040
1041
1042
1043
1044
1045 (*nXorBufs) = 1;
1046
1047 if (new_asm_h[0]) {
1048 for (pda = new_asm_h[0]->stripeMap->physInfo; pda;
1049 pda = pda->next) {
1050 pda->bufPtr = bufP;
1051 bufP += rf_RaidAddressToByte(raidPtr, pda->numSector);
1052 }
1053 *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
1054 }
1055 if (new_asm_h[1]) {
1056 for (pda = new_asm_h[1]->stripeMap->physInfo; pda;
1057 pda = pda->next) {
1058 pda->bufPtr = bufP;
1059 bufP += rf_RaidAddressToByte(raidPtr, pda->numSector);
1060 }
1061 (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
1062 }
1063 if (rpBufPtr)
1064
1065 *rpBufPtr = bufP;
1066
1067
1068
1069
1070
1071
1072
1073
1074 for (foundit = i = 0, pda = asmap->physInfo;
1075 pda; i++, pda = pda->next) {
1076 if (pda == failedPDA) {
1077 i--;
1078 foundit = 1;
1079 continue;
1080 }
1081 if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) {
1082 overlappingPDAs[i] = 1;
1083 (*nXorBufs)++;
1084 }
1085 }
1086 if (!foundit) {
1087 RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA"
1088 " in asm list.\n");
1089 RF_ASSERT(0);
1090 }
1091 if (rf_degDagDebug) {
1092 if (new_asm_h[0]) {
1093 printf("First asm:\n");
1094 rf_PrintFullAccessStripeMap(new_asm_h[0], 1);
1095 }
1096 if (new_asm_h[1]) {
1097 printf("Second asm:\n");
1098 rf_PrintFullAccessStripeMap(new_asm_h[1], 1);
1099 }
1100 }
1101 }
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 void
1128 rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src,
1129 RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr)
1130 {
1131 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
1132 RF_SectorNum_t soffs =
1133 rf_StripeUnitOffset(layoutPtr, src->startSector);
1134 RF_SectorNum_t doffs =
1135 rf_StripeUnitOffset(layoutPtr, dest->startSector);
1136 RF_SectorNum_t send =
1137 rf_StripeUnitOffset(layoutPtr, src->startSector +
1138 src->numSector - 1);
1139 RF_SectorNum_t dend =
1140 rf_StripeUnitOffset(layoutPtr, dest->startSector +
1141 dest->numSector - 1);
1142 RF_SectorNum_t subAddr =
1143 rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
1144 dest->startSector);
1145
1146 dest->startSector = subAddr + RF_MAX(soffs, doffs);
1147 dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector;
1148
1149 if (dobuffer)
1150 dest->bufPtr += (soffs > doffs) ?
1151 rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0;
1152 if (doraidaddr) {
1153 dest->raidAddress =
1154 rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
1155 dest->raidAddress) +
1156 rf_StripeUnitOffset(layoutPtr, dest->startSector);
1157 }
1158 }
1159
1160
1161
1162
1163
1164
1165
1166 #define NLOWPRIMES 8
1167 static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19};
1168
1169
1170
1171
1172
1173
1174
1175 int
1176 rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda)
1177 {
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189 RF_RowCol_t d, f, sd, row, n;
1190 int k, v, ret, i;
1191
1192 row = pda->row;
1193 n = raidPtr->numCol;
1194
1195
1196 d = pda->col;
1197
1198
1199 for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) &&
1200 (f < n)); f++);
1201
1202 RF_ASSERT(f < n);
1203 RF_ASSERT(f != d);
1204
1205 sd = (f > d) ? (n + d - f) : (d - f);
1206 RF_ASSERT(sd < n);
1207
1208
1209
1210
1211
1212
1213 v = (n - 1 - sd);
1214 k = (n - 1);
1215
1216 #if 1
1217
1218
1219
1220
1221
1222
1223
1224 for (i = 0; i < NLOWPRIMES; i++) {
1225 if (lowprimes[i] > v)
1226 break;
1227 while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) {
1228 v /= lowprimes[i];
1229 k /= lowprimes[i];
1230 }
1231 }
1232 #endif
1233
1234 raidPtr->hist_diskreq[row][d]++;
1235 if (raidPtr->hist_diskreq[row][d] > v) {
1236 ret = 0;
1237 } else {
1238 ret = 1;
1239 }
1240
1241 #if 0
1242 printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret,
1243 raidPtr->hist_diskreq[row][d]);
1244 #endif
1245
1246 if (raidPtr->hist_diskreq[row][d] >= k) {
1247
1248 raidPtr->hist_diskreq[row][d] = 0;
1249 }
1250 return (ret);
1251 }
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262 void
1263 rf_SelectMirrorDiskIdle(RF_DagNode_t *node)
1264 {
1265 RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
1266 RF_RowCol_t rowData, colData, rowMirror, colMirror;
1267 int dataQueueLength, mirrorQueueLength, usemirror;
1268 RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
1269 RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
1270 RF_PhysDiskAddr_t *tmp_pda;
1271 RF_RaidDisk_t **disks = raidPtr->Disks;
1272 RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
1273
1274
1275 rowData = data_pda->row;
1276 colData = data_pda->col;
1277 rowMirror = mirror_pda->row;
1278 colMirror = mirror_pda->col;
1279 dataQueue = &(dqs[rowData][colData]);
1280 mirrorQueue = &(dqs[rowMirror][colMirror]);
1281
1282 #ifdef RF_LOCK_QUEUES_TO_READ_LEN
1283 RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
1284 #endif
1285 dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;
1286 #ifdef RF_LOCK_QUEUES_TO_READ_LEN
1287 RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
1288 RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
1289 #endif
1290 mirrorQueueLength = mirrorQueue->queueLength +
1291 mirrorQueue->numOutstanding;
1292 #ifdef RF_LOCK_QUEUES_TO_READ_LEN
1293 RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
1294 #endif
1295
1296 usemirror = 0;
1297 if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
1298 usemirror = 0;
1299 } else
1300 if (RF_DEAD_DISK(disks[rowData][colData].status)) {
1301 usemirror = 1;
1302 } else
1303 if (raidPtr->parity_good == RF_RAID_DIRTY) {
1304
1305 usemirror = 0;
1306 } else
1307 if (dataQueueLength < mirrorQueueLength) {
1308 usemirror = 0;
1309 } else
1310 if (mirrorQueueLength < dataQueueLength) {
1311 usemirror = 1;
1312 } else {
1313
1314
1315 if (SNUM_DIFF(dataQueue
1316 ->last_deq_sector, data_pda
1317 ->startSector) <=
1318 SNUM_DIFF(mirrorQueue
1319 ->last_deq_sector, mirror_pda
1320 ->startSector)) {
1321 usemirror = 0;
1322 } else {
1323 usemirror = 1;
1324 }
1325 }
1326
1327 if (usemirror) {
1328
1329 tmp_pda = data_pda;
1330 node->params[0].p = mirror_pda;
1331 node->params[4].p = tmp_pda;
1332 } else {
1333
1334 }
1335
1336
1337 }
1338
1339
1340
1341
1342
1343 void
1344 rf_SelectMirrorDiskPartition(RF_DagNode_t *node)
1345 {
1346 RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
1347 RF_RowCol_t rowData, colData, rowMirror, colMirror;
1348 RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
1349 RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
1350 RF_PhysDiskAddr_t *tmp_pda;
1351 RF_RaidDisk_t **disks = raidPtr->Disks;
1352 RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
1353 int usemirror;
1354
1355
1356 rowData = data_pda->row;
1357 colData = data_pda->col;
1358 rowMirror = mirror_pda->row;
1359 colMirror = mirror_pda->col;
1360 dataQueue = &(dqs[rowData][colData]);
1361 mirrorQueue = &(dqs[rowMirror][colMirror]);
1362
1363 usemirror = 0;
1364 if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
1365 usemirror = 0;
1366 } else
1367 if (RF_DEAD_DISK(disks[rowData][colData].status)) {
1368 usemirror = 1;
1369 } else
1370 if (raidPtr->parity_good == RF_RAID_DIRTY) {
1371
1372 usemirror = 0;
1373 } else
1374 if (data_pda->startSector <
1375 (disks[rowData][colData].numBlocks / 2)) {
1376 usemirror = 0;
1377 } else {
1378 usemirror = 1;
1379 }
1380
1381 if (usemirror) {
1382
1383 tmp_pda = data_pda;
1384 node->params[0].p = mirror_pda;
1385 node->params[4].p = tmp_pda;
1386 } else {
1387
1388 }
1389 }