This source file includes following definitions.
- rf_SignalReconDone
- rf_RegisterReconDoneProc
- rf_ShutdownReconstruction
- rf_ConfigureReconstruction
- rf_AllocRaidReconDesc
- rf_FreeReconDesc
- rf_ReconstructFailedDisk
- rf_ReconstructFailedDiskBasic
- rf_ReconstructInPlace
- rf_ContinueReconstructFailedDisk
- rf_ProcessReconEvent
- rf_IssueNextReadRequest
- rf_TryToRead
- rf_ComputePSDiskOffsets
- rf_IssueNextWriteRequest
- rf_ReconReadDoneProc
- rf_ReconWriteDoneProc
- rf_CheckForNewMinHeadSep
- rf_CheckHeadSeparation
- rf_CheckForcedOrBlockedReconstruction
- rf_ForceOrBlockRecon
- rf_ForceReconReadDoneProc
- rf_UnblockRecon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 #include "rf_types.h"
38 #include <sys/time.h>
39 #include <sys/buf.h>
40 #include <sys/errno.h>
41
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/ioctl.h>
47 #include <sys/fcntl.h>
48 #if __NETBSD__
49 #include <sys/vnode.h>
50 #endif
51
52 #include "rf_raid.h"
53 #include "rf_reconutil.h"
54 #include "rf_revent.h"
55 #include "rf_reconbuffer.h"
56 #include "rf_acctrace.h"
57 #include "rf_etimer.h"
58 #include "rf_dag.h"
59 #include "rf_desc.h"
60 #include "rf_general.h"
61 #include "rf_freelist.h"
62 #include "rf_debugprint.h"
63 #include "rf_driver.h"
64 #include "rf_utils.h"
65 #include "rf_shutdown.h"
66
67 #include "rf_kintf.h"
68
69
70
71
72
73
74 #define Dprintf(s) \
75 do { \
76 if (rf_reconDebug) \
77 rf_debug_printf(s, \
78 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); \
79 } while (0)
80 #define Dprintf1(s,a) \
81 do { \
82 if (rf_reconDebug) \
83 rf_debug_printf(s, \
84 (void *)((unsigned long)a), \
85 NULL, NULL, NULL, NULL, NULL, NULL, NULL); \
86 } while (0)
87 #define Dprintf2(s,a,b) \
88 do { \
89 if (rf_reconDebug) \
90 rf_debug_printf(s, \
91 (void *)((unsigned long)a), \
92 (void *)((unsigned long)b), \
93 NULL, NULL, NULL, NULL, NULL, NULL); \
94 } while (0)
95 #define Dprintf3(s,a,b,c) \
96 do { \
97 if (rf_reconDebug) \
98 rf_debug_printf(s, \
99 (void *)((unsigned long)a), \
100 (void *)((unsigned long)b), \
101 (void *)((unsigned long)c), \
102 NULL, NULL, NULL, NULL, NULL); \
103 } while (0)
104 #define Dprintf4(s,a,b,c,d) \
105 do { \
106 if (rf_reconDebug) \
107 rf_debug_printf(s, \
108 (void *)((unsigned long)a), \
109 (void *)((unsigned long)b), \
110 (void *)((unsigned long)c), \
111 (void *)((unsigned long)d), \
112 NULL, NULL, NULL, NULL); \
113 } while (0)
114 #define Dprintf5(s,a,b,c,d,e) \
115 do { \
116 if (rf_reconDebug) \
117 rf_debug_printf(s, \
118 (void *)((unsigned long)a), \
119 (void *)((unsigned long)b), \
120 (void *)((unsigned long)c), \
121 (void *)((unsigned long)d), \
122 (void *)((unsigned long)e), \
123 NULL, NULL, NULL); \
124 } while (0)
125 #define Dprintf6(s,a,b,c,d,e,f) \
126 do { \
127 if (rf_reconDebug) \
128 rf_debug_printf(s, \
129 (void *)((unsigned long)a), \
130 (void *)((unsigned long)b), \
131 (void *)((unsigned long)c), \
132 (void *)((unsigned long)d), \
133 (void *)((unsigned long)e), \
134 (void *)((unsigned long)f), \
135 NULL, NULL); \
136 } while (0)
137 #define Dprintf7(s,a,b,c,d,e,f,g) \
138 do { \
139 if (rf_reconDebug) \
140 rf_debug_printf(s, \
141 (void *)((unsigned long)a), \
142 (void *)((unsigned long)b), \
143 (void *)((unsigned long)c), \
144 (void *)((unsigned long)d), \
145 (void *)((unsigned long)e), \
146 (void *)((unsigned long)f), \
147 (void *)((unsigned long)g), \
148 NULL); \
149 } while (0)
150
151 #define DDprintf1(s,a) \
152 do { \
153 if (rf_reconDebug) \
154 rf_debug_printf(s, \
155 (void *)((unsigned long)a), \
156 NULL, NULL, NULL, NULL, NULL, NULL, NULL); \
157 } while (0)
158 #define DDprintf2(s,a,b) \
159 do { \
160 if (rf_reconDebug) \
161 rf_debug_printf(s, \
162 (void *)((unsigned long)a), \
163 (void *)((unsigned long)b), \
164 NULL, NULL, NULL, NULL, NULL, NULL); \
165 } while (0)
166
167 static RF_FreeList_t *rf_recond_freelist;
168 #define RF_MAX_FREE_RECOND 4
169 #define RF_RECOND_INC 1
170
171 RF_RaidReconDesc_t *rf_AllocRaidReconDesc(RF_Raid_t *,
172 RF_RowCol_t, RF_RowCol_t, RF_RaidDisk_t *, int,
173 RF_RowCol_t, RF_RowCol_t);
174 int rf_ProcessReconEvent(RF_Raid_t *, RF_RowCol_t, RF_ReconEvent_t *);
175 int rf_IssueNextReadRequest(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
176 int rf_TryToRead(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
177 int rf_ComputePSDiskOffsets(RF_Raid_t *, RF_StripeNum_t,
178 RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t *, RF_SectorNum_t *,
179 RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *);
180 int rf_ReconReadDoneProc(void *, int);
181 int rf_ReconWriteDoneProc(void *, int);
182 void rf_CheckForNewMinHeadSep(RF_Raid_t *, RF_RowCol_t, RF_HeadSepLimit_t);
183 int rf_CheckHeadSeparation(RF_Raid_t *, RF_PerDiskReconCtrl_t *,
184 RF_RowCol_t, RF_RowCol_t, RF_HeadSepLimit_t, RF_ReconUnitNum_t);
185 void rf_ForceReconReadDoneProc(void *, int);
186 void rf_ShutdownReconstruction(void *);
187
188
189
190
191
192 void rf_FreeReconDesc(RF_RaidReconDesc_t *);
193 int rf_IssueNextWriteRequest(RF_Raid_t *, RF_RowCol_t);
194 int rf_CheckForcedOrBlockedReconstruction(RF_Raid_t *,
195 RF_ReconParityStripeStatus_t *, RF_PerDiskReconCtrl_t *,
196 RF_RowCol_t, RF_RowCol_t, RF_StripeNum_t, RF_ReconUnitNum_t);
197 void rf_SignalReconDone(RF_Raid_t *);
198
199 struct RF_ReconDoneProc_s {
200 void (*proc) (RF_Raid_t *, void *);
201 void *arg;
202 RF_ReconDoneProc_t *next;
203 };
204
205 static RF_FreeList_t *rf_rdp_freelist;
206 #define RF_MAX_FREE_RDP 4
207 #define RF_RDP_INC 1
208
209 void
210 rf_SignalReconDone(RF_Raid_t *raidPtr)
211 {
212 RF_ReconDoneProc_t *p;
213
214 RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
215 for (p = raidPtr->recon_done_procs; p; p = p->next) {
216 p->proc(raidPtr, p->arg);
217 }
218 RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
219 }
220
221 int
222 rf_RegisterReconDoneProc(RF_Raid_t *raidPtr, void (*proc) (RF_Raid_t *, void *),
223 void *arg, RF_ReconDoneProc_t **handlep)
224 {
225 RF_ReconDoneProc_t *p;
226
227 RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *));
228 if (p == NULL)
229 return (ENOMEM);
230 p->proc = proc;
231 p->arg = arg;
232 RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
233 p->next = raidPtr->recon_done_procs;
234 raidPtr->recon_done_procs = p;
235 RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
236 if (handlep)
237 *handlep = p;
238 return (0);
239 }
240
241
242
243
244
245
246
247
248
249
250 void
251 rf_ShutdownReconstruction(void *ignored)
252 {
253 RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *));
254 RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *));
255 }
256
257 int
258 rf_ConfigureReconstruction(RF_ShutdownList_t **listp)
259 {
260 int rc;
261
262 RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND,
263 RF_RECOND_INC, sizeof(RF_RaidReconDesc_t));
264 if (rf_recond_freelist == NULL)
265 return (ENOMEM);
266 RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP,
267 RF_RDP_INC, sizeof(RF_ReconDoneProc_t));
268 if (rf_rdp_freelist == NULL) {
269 RF_FREELIST_DESTROY(rf_recond_freelist, next,
270 (RF_RaidReconDesc_t *));
271 return (ENOMEM);
272 }
273 rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL);
274 if (rc) {
275 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
276 " rc=%d.\n", __FILE__, __LINE__, rc);
277 rf_ShutdownReconstruction(NULL);
278 return (rc);
279 }
280 return (0);
281 }
282
283 RF_RaidReconDesc_t *
284 rf_AllocRaidReconDesc(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col,
285 RF_RaidDisk_t *spareDiskPtr, int numDisksDone, RF_RowCol_t srow,
286 RF_RowCol_t scol)
287 {
288
289 RF_RaidReconDesc_t *reconDesc;
290
291 RF_FREELIST_GET(rf_recond_freelist, reconDesc, next,
292 (RF_RaidReconDesc_t *));
293
294 reconDesc->raidPtr = raidPtr;
295 reconDesc->row = row;
296 reconDesc->col = col;
297 reconDesc->spareDiskPtr = spareDiskPtr;
298 reconDesc->numDisksDone = numDisksDone;
299 reconDesc->srow = srow;
300 reconDesc->scol = scol;
301 reconDesc->state = 0;
302 reconDesc->next = NULL;
303
304 return (reconDesc);
305 }
306
307 void
308 rf_FreeReconDesc(RF_RaidReconDesc_t *reconDesc)
309 {
310 #if RF_RECON_STATS > 0
311 printf("RAIDframe: %qu recon event waits, %qu recon delays.\n",
312 reconDesc->numReconEventWaits, reconDesc->numReconExecDelays);
313 #endif
314
315 printf("RAIDframe: %qu max exec ticks.\n",
316 reconDesc->maxReconExecTicks);
317
318 #if (RF_RECON_STATS > 0) || defined(_KERNEL)
319 printf("\n");
320 #endif
321 RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next);
322 }
323
324
325
326
327
328
329
330
331
332 int
333 rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
334 {
335 RF_LayoutSW_t *lp;
336 int rc;
337
338 lp = raidPtr->Layout.map;
339 if (lp->SubmitReconBuffer) {
340
341
342
343
344 RF_LOCK_MUTEX(raidPtr->mutex);
345 while (raidPtr->reconInProgress) {
346 RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
347 }
348 raidPtr->reconInProgress++;
349 RF_UNLOCK_MUTEX(raidPtr->mutex);
350 rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col);
351 RF_LOCK_MUTEX(raidPtr->mutex);
352 raidPtr->reconInProgress--;
353 RF_UNLOCK_MUTEX(raidPtr->mutex);
354 } else {
355 RF_ERRORMSG1("RECON: no way to reconstruct failed disk for"
356 " arch %c.\n", lp->parityConfig);
357 rc = EIO;
358 }
359 RF_SIGNAL_COND(raidPtr->waitForReconCond);
360 wakeup(&raidPtr->waitForReconCond);
361
362
363
364 return (rc);
365 }
366
367 int
368 rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t row,
369 RF_RowCol_t col)
370 {
371 RF_ComponentLabel_t c_label;
372 RF_RaidDisk_t *spareDiskPtr = NULL;
373 RF_RaidReconDesc_t *reconDesc;
374 RF_RowCol_t srow, scol;
375 int numDisksDone = 0, rc;
376
377
378
379
380
381
382
383 RF_LOCK_MUTEX(raidPtr->mutex);
384 RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
385
386 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
387 if (raidPtr->status[row] != rf_rs_degraded) {
388 RF_ERRORMSG2("Unable to reconstruct disk at row %d"
389 " col %d because status not degraded.\n", row, col);
390 RF_UNLOCK_MUTEX(raidPtr->mutex);
391 return (EINVAL);
392 }
393 srow = row;
394 scol = (-1);
395 } else {
396 srow = 0;
397 for (scol = raidPtr->numCol;
398 scol < raidPtr->numCol + raidPtr->numSpare; scol++) {
399 if (raidPtr->Disks[srow][scol].status == rf_ds_spare) {
400 spareDiskPtr = &raidPtr->Disks[srow][scol];
401 spareDiskPtr->status = rf_ds_used_spare;
402 break;
403 }
404 }
405 if (!spareDiskPtr) {
406 RF_ERRORMSG2("Unable to reconstruct disk at row %d"
407 " col %d because no spares are available.\n",
408 row, col);
409 RF_UNLOCK_MUTEX(raidPtr->mutex);
410 return (ENOSPC);
411 }
412 printf("RECON: initiating reconstruction on row %d col %d"
413 " -> spare at row %d col %d.\n", row, col, srow, scol);
414 }
415 RF_UNLOCK_MUTEX(raidPtr->mutex);
416
417 reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col,
418 spareDiskPtr, numDisksDone, srow, scol);
419 raidPtr->reconDesc = (void *) reconDesc;
420 #if RF_RECON_STATS > 0
421 reconDesc->hsStallCount = 0;
422 reconDesc->numReconExecDelays = 0;
423 reconDesc->numReconEventWaits = 0;
424 #endif
425 reconDesc->reconExecTimerRunning = 0;
426 reconDesc->reconExecTicks = 0;
427 reconDesc->maxReconExecTicks = 0;
428 rc = rf_ContinueReconstructFailedDisk(reconDesc);
429
430 if (!rc) {
431
432
433 raidread_component_label(
434 raidPtr->raid_cinfo[srow][scol].ci_dev,
435 raidPtr->raid_cinfo[srow][scol].ci_vp,
436 &c_label);
437
438 raid_init_component_label(raidPtr, &c_label);
439 c_label.row = row;
440 c_label.column = col;
441 c_label.clean = RF_RAID_DIRTY;
442 c_label.status = rf_ds_optimal;
443
444
445
446 raidwrite_component_label(
447 raidPtr->raid_cinfo[srow][scol].ci_dev,
448 raidPtr->raid_cinfo[srow][scol].ci_vp,
449 &c_label);
450
451 }
452 return (rc);
453 }
454
455
456
457
458
459
460
461
462
463
464
465 int
466 rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
467 {
468 RF_RaidDisk_t *spareDiskPtr = NULL;
469 RF_RaidReconDesc_t *reconDesc;
470 RF_LayoutSW_t *lp;
471 RF_RaidDisk_t *badDisk;
472 RF_ComponentLabel_t c_label;
473 int numDisksDone = 0, rc;
474 struct partinfo dpart;
475 struct vnode *vp;
476 struct vattr va;
477 struct proc *proc;
478 int retcode;
479 int ac;
480
481 lp = raidPtr->Layout.map;
482 if (lp->SubmitReconBuffer) {
483
484
485
486
487 RF_LOCK_MUTEX(raidPtr->mutex);
488 if ((raidPtr->Disks[row][col].status == rf_ds_optimal) &&
489 (raidPtr->numFailures > 0)) {
490
491
492
493
494
495
496 #ifdef RAIDDEBUG
497 printf("RAIDFRAME: Unable to reconstruct to disk at:\n"
498 " Row: %d Col: %d Too many failures.\n",
499 row, col);
500 #endif
501 RF_UNLOCK_MUTEX(raidPtr->mutex);
502 return (EINVAL);
503 }
504 if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) {
505 #ifdef RAIDDEBUG
506 printf("RAIDFRAME: Unable to reconstruct to disk at:\n"
507 " Row: %d Col: %d Reconstruction already"
508 " occurring !\n", row, col);
509 #endif
510
511 RF_UNLOCK_MUTEX(raidPtr->mutex);
512 return (EINVAL);
513 }
514
515
516 if (raidPtr->Disks[row][col].status != rf_ds_failed) {
517
518 raidPtr->numFailures++;
519 raidPtr->Disks[row][col].status = rf_ds_failed;
520 raidPtr->status[row] = rf_rs_degraded;
521 rf_update_component_labels(raidPtr,
522 RF_NORMAL_COMPONENT_UPDATE);
523 }
524
525 while (raidPtr->reconInProgress) {
526 RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
527 }
528
529 raidPtr->reconInProgress++;
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
547 RF_ERRORMSG2("Unable to reconstruct to disk at row %d"
548 " col %d: operation not supported for"
549 " RF_DISTRIBUTE_SPARE.\n", row, col);
550
551 raidPtr->reconInProgress--;
552 RF_UNLOCK_MUTEX(raidPtr->mutex);
553 return (EINVAL);
554 }
555
556
557
558
559
560
561 badDisk = &raidPtr->Disks[row][col];
562
563 proc = raidPtr->recon_thread;
564
565
566
567
568
569
570 if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) {
571 printf("Closing the opened device: %s\n",
572 raidPtr->Disks[row][col].devname);
573 vp = raidPtr->raid_cinfo[row][col].ci_vp;
574 ac = raidPtr->Disks[row][col].auto_configured;
575 rf_close_component(raidPtr, vp, ac);
576 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
577 }
578
579
580
581 raidPtr->Disks[row][col].auto_configured = 0;
582
583 printf("About to (re-)open the device for rebuilding: %s\n",
584 raidPtr->Disks[row][col].devname);
585
586 retcode = raidlookup(raidPtr->Disks[row][col].devname,
587 proc, &vp);
588
589 if (retcode) {
590 printf("raid%d: rebuilding: raidlookup on device: %s"
591 " failed: %d !\n", raidPtr->raidid,
592 raidPtr->Disks[row][col].devname, retcode);
593
594
595
596
597
598 raidPtr->reconInProgress--;
599 RF_UNLOCK_MUTEX(raidPtr->mutex);
600 return(retcode);
601
602 } else {
603
604
605
606
607
608
609 if ((retcode =
610 VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
611 raidPtr->reconInProgress--;
612 RF_UNLOCK_MUTEX(raidPtr->mutex);
613 return(retcode);
614 }
615 retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
616 FREAD, proc->p_ucred, proc);
617 if (retcode) {
618 raidPtr->reconInProgress--;
619 RF_UNLOCK_MUTEX(raidPtr->mutex);
620 return(retcode);
621 }
622 raidPtr->Disks[row][col].blockSize =
623 dpart.disklab->d_secsize;
624
625 raidPtr->Disks[row][col].numBlocks =
626 DL_GETPSIZE(dpart.part) - rf_protectedSectors;
627
628 raidPtr->raid_cinfo[row][col].ci_vp = vp;
629 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
630
631 raidPtr->Disks[row][col].dev = va.va_rdev;
632
633
634
635
636
637
638 raidPtr->Disks[row][col].numBlocks =
639 raidPtr->Disks[row][col].numBlocks *
640 rf_sizePercentage / 100;
641 }
642
643 spareDiskPtr = &raidPtr->Disks[row][col];
644 spareDiskPtr->status = rf_ds_used_spare;
645
646 printf("RECON: Initiating in-place reconstruction on\n");
647 printf(" row %d col %d -> spare at row %d col %d.\n",
648 row, col, row, col);
649
650 RF_UNLOCK_MUTEX(raidPtr->mutex);
651
652 reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col,
653 spareDiskPtr, numDisksDone, row, col);
654 raidPtr->reconDesc = (void *) reconDesc;
655 #if RF_RECON_STATS > 0
656 reconDesc->hsStallCount = 0;
657 reconDesc->numReconExecDelays = 0;
658 reconDesc->numReconEventWaits = 0;
659 #endif
660 reconDesc->reconExecTimerRunning = 0;
661 reconDesc->reconExecTicks = 0;
662 reconDesc->maxReconExecTicks = 0;
663 rc = rf_ContinueReconstructFailedDisk(reconDesc);
664
665 RF_LOCK_MUTEX(raidPtr->mutex);
666 raidPtr->reconInProgress--;
667 RF_UNLOCK_MUTEX(raidPtr->mutex);
668
669 } else {
670 RF_ERRORMSG1("RECON: no way to reconstruct failed disk for"
671 " arch %c.\n", lp->parityConfig);
672 rc = EIO;
673 }
674 RF_LOCK_MUTEX(raidPtr->mutex);
675
676 if (!rc) {
677
678
679
680
681
682 raidPtr->Disks[row][col].status = rf_ds_optimal;
683 raidPtr->status[row] = rf_rs_optimal;
684
685
686
687 raidread_component_label(
688 raidPtr->raid_cinfo[row][col].ci_dev,
689 raidPtr->raid_cinfo[row][col].ci_vp,
690 &c_label);
691
692 raid_init_component_label(raidPtr, &c_label);
693
694 c_label.row = row;
695 c_label.column = col;
696
697 raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev,
698 raidPtr->raid_cinfo[row][col].ci_vp, &c_label);
699
700 }
701 RF_UNLOCK_MUTEX(raidPtr->mutex);
702 RF_SIGNAL_COND(raidPtr->waitForReconCond);
703 wakeup(&raidPtr->waitForReconCond);
704 return (rc);
705 }
706
707
708 int
709 rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
710 {
711 RF_Raid_t *raidPtr = reconDesc->raidPtr;
712 RF_RowCol_t row = reconDesc->row;
713 RF_RowCol_t col = reconDesc->col;
714 RF_RowCol_t srow = reconDesc->srow;
715 RF_RowCol_t scol = reconDesc->scol;
716 RF_ReconMap_t *mapPtr;
717
718 RF_ReconEvent_t *event;
719 struct timeval etime, elpsd;
720 unsigned long xor_s, xor_resid_us;
721 int retcode, i, ds;
722
723 switch (reconDesc->state) {
724 case 0:
725 raidPtr->accumXorTimeUs = 0;
726
727
728 RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol *
729 sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
730
731
732
733
734
735
736 reconDesc->state = 1;
737
738 Dprintf("RECON: begin request suspend.\n");
739 retcode = rf_SuspendNewRequestsAndWait(raidPtr);
740 Dprintf("RECON: end request suspend.\n");
741 rf_StartUserStats(raidPtr);
742
743
744
745
746 case 1:
747 RF_LOCK_MUTEX(raidPtr->mutex);
748
749
750
751
752
753 raidPtr->reconControl[row] =
754 rf_MakeReconControl(reconDesc, row, col, srow, scol);
755 mapPtr = raidPtr->reconControl[row]->reconMap;
756 raidPtr->status[row] = rf_rs_reconstructing;
757 raidPtr->Disks[row][col].status = rf_ds_reconstructing;
758 raidPtr->Disks[row][col].spareRow = srow;
759 raidPtr->Disks[row][col].spareCol = scol;
760
761 RF_UNLOCK_MUTEX(raidPtr->mutex);
762
763 RF_GETTIME(raidPtr->reconControl[row]->starttime);
764
765
766
767
768
769
770 reconDesc->numDisksDone = 0;
771 for (i = 0; i < raidPtr->numCol; i++) {
772 if (i != col) {
773
774
775
776
777 if (rf_IssueNextReadRequest(raidPtr, row, i)) {
778 Dprintf2("RECON: done issuing for r%d"
779 " c%d.\n", row, i);
780 reconDesc->numDisksDone++;
781 }
782 }
783 }
784
785 reconDesc->state = 2;
786
787 case 2:
788 Dprintf("RECON: resume requests.\n");
789 rf_ResumeNewRequests(raidPtr);
790
791 reconDesc->state = 3;
792
793 case 3:
794
795
796
797
798
799 mapPtr = raidPtr->reconControl[row]->reconMap;
800
801 while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
802
803 event = rf_GetNextReconEvent(reconDesc, row,
804 (void (*) (void *)) rf_ContinueReconstructFailedDisk,
805 reconDesc);
806 RF_ASSERT(event);
807
808 if (rf_ProcessReconEvent(raidPtr, row, event))
809 reconDesc->numDisksDone++;
810 raidPtr->reconControl[row]->numRUsTotal =
811 mapPtr->totalRUs;
812 raidPtr->reconControl[row]->numRUsComplete =
813 mapPtr->totalRUs -
814 rf_UnitsLeftToReconstruct(mapPtr);
815
816 raidPtr->reconControl[row]->percentComplete =
817 (raidPtr->reconControl[row]->numRUsComplete * 100 /
818 raidPtr->reconControl[row]->numRUsTotal);
819 if (rf_prReconSched) {
820 rf_PrintReconSchedule(
821 raidPtr->reconControl[row]->reconMap,
822 &(raidPtr->reconControl[row]->starttime));
823 }
824 }
825
826 reconDesc->state = 4;
827
828 case 4:
829 mapPtr = raidPtr->reconControl[row]->reconMap;
830 if (rf_reconDebug) {
831 printf("RECON: all reads completed.\n");
832 }
833
834
835
836
837
838 while (rf_UnitsLeftToReconstruct(
839 raidPtr->reconControl[row]->reconMap) > 0) {
840
841 event = rf_GetNextReconEvent(reconDesc, row,
842 (void (*) (void *)) rf_ContinueReconstructFailedDisk,
843 reconDesc);
844 RF_ASSERT(event);
845
846
847 (void) rf_ProcessReconEvent(raidPtr, row, event);
848 raidPtr->reconControl[row]->percentComplete =
849 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 /
850 mapPtr->totalRUs);
851 if (rf_prReconSched) {
852 rf_PrintReconSchedule(
853 raidPtr->reconControl[row]->reconMap,
854 &(raidPtr->reconControl[row]->starttime));
855 }
856 }
857 reconDesc->state = 5;
858
859 case 5:
860
861
862
863
864
865
866
867 reconDesc->state = 6;
868
869 retcode = rf_SuspendNewRequestsAndWait(raidPtr);
870 rf_StopUserStats(raidPtr);
871 rf_PrintUserStats(raidPtr);
872
873
874
875
876
877
878 case 6:
879 RF_LOCK_MUTEX(raidPtr->mutex);
880 raidPtr->numFailures--;
881 ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE);
882 raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared :
883 rf_ds_spared;
884 raidPtr->status[row] = (ds) ? rf_rs_reconfigured :
885 rf_rs_optimal;
886 RF_UNLOCK_MUTEX(raidPtr->mutex);
887 RF_GETTIME(etime);
888 RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime),
889 &etime, &elpsd);
890
891
892
893
894
895
896
897
898 case 7:
899
900 rf_ResumeNewRequests(raidPtr);
901
902 printf("Reconstruction of disk at row %d col %d completed.\n",
903 row, col);
904 xor_s = raidPtr->accumXorTimeUs / 1000000;
905 xor_resid_us = raidPtr->accumXorTimeUs % 1000000;
906 printf("Recon time was %d.%06d seconds, accumulated XOR time"
907 " was %ld us (%ld.%06ld).\n", (int) elpsd.tv_sec,
908 (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s,
909 xor_resid_us);
910 printf(" (start time %d sec %d usec, end time %d sec %d"
911 " usec)\n",
912 (int) raidPtr->reconControl[row]->starttime.tv_sec,
913 (int) raidPtr->reconControl[row]->starttime.tv_usec,
914 (int) etime.tv_sec, (int) etime.tv_usec);
915
916 #if RF_RECON_STATS > 0
917 printf("Total head-sep stall count was %d.\n",
918 (int) reconDesc->hsStallCount);
919 #endif
920 rf_FreeReconControl(raidPtr, row);
921 RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol *
922 sizeof(RF_AccTraceEntry_t));
923 rf_FreeReconDesc(reconDesc);
924
925 }
926
927 rf_SignalReconDone(raidPtr);
928 return (0);
929 }
930
931
932
933
934
935
936
937 int
938 rf_ProcessReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t frow,
939 RF_ReconEvent_t *event)
940 {
941 int retcode = 0, submitblocked;
942 RF_ReconBuffer_t *rbuf;
943 RF_SectorCount_t sectorsPerRU;
944
945 Dprintf1("RECON: rf_ProcessReconEvent type %d.\n", event->type);
946
947 switch (event->type) {
948
949
950 case RF_REVENT_READDONE:
951 rbuf = raidPtr->reconControl[frow]
952 ->perDiskInfo[event->col].rbuf;
953 Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld.\n",
954 frow, event->col, rbuf->parityStripeID);
955 Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x"
956 " %02x %02x.\n", rbuf->parityStripeID, rbuf->buffer,
957 rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
958 rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff,
959 rbuf->buffer[4] & 0xff);
960 rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
961 submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
962 Dprintf1("RECON: submitblocked=%d.\n", submitblocked);
963 if (!submitblocked)
964 retcode = rf_IssueNextReadRequest(raidPtr, frow,
965 event->col);
966 break;
967
968
969 case RF_REVENT_WRITEDONE:
970 if (rf_floatingRbufDebug) {
971 rf_CheckFloatingRbufCount(raidPtr, 1);
972 }
973 sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit *
974 raidPtr->Layout.SUsPerRU;
975 rbuf = (RF_ReconBuffer_t *) event->arg;
976 rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
977 Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d"
978 " (%d %% complete).\n",
979 rbuf->parityStripeID, rbuf->which_ru,
980 raidPtr->reconControl[frow]->percentComplete);
981 rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]
982 ->reconMap, rbuf->failedDiskSectorOffset,
983 rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
984 rf_RemoveFromActiveReconTable(raidPtr, frow,
985 rbuf->parityStripeID, rbuf->which_ru);
986
987 if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
988 RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
989 raidPtr->numFullReconBuffers--;
990 rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf);
991 RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
992 } else
993 if (rbuf->type == RF_RBUF_TYPE_FORCED)
994 rf_FreeReconBuffer(rbuf);
995 else
996 RF_ASSERT(0);
997 break;
998
999
1000 case RF_REVENT_BUFCLEAR:
1001 Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d.\n", frow,
1002 event->col);
1003 submitblocked = rf_SubmitReconBuffer(raidPtr
1004 ->reconControl[frow]->perDiskInfo[event->col].rbuf, 0,
1005 (int) (long) event->arg);
1006 RF_ASSERT(!submitblocked);
1007
1008
1009
1010
1011 retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col);
1012 break;
1013
1014
1015 case RF_REVENT_BLOCKCLEAR:
1016 DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d.\n",
1017 frow, event->col);
1018 retcode = rf_TryToRead(raidPtr, frow, event->col);
1019 break;
1020
1021
1022
1023
1024
1025 case RF_REVENT_HEADSEPCLEAR:
1026 Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d.\n",
1027 frow, event->col);
1028 retcode = rf_TryToRead(raidPtr, frow, event->col);
1029 break;
1030
1031
1032 case RF_REVENT_BUFREADY:
1033 Dprintf2("RECON: BUFREADY EVENT: row %d col %d.\n",
1034 frow, event->col);
1035 retcode = rf_IssueNextWriteRequest(raidPtr, frow);
1036 if (rf_floatingRbufDebug) {
1037 rf_CheckFloatingRbufCount(raidPtr, 1);
1038 }
1039 break;
1040
1041
1042
1043
1044
1045 case RF_REVENT_SKIP:
1046 DDprintf2("RECON: SKIP EVENT: row %d col %d.\n",
1047 frow, event->col);
1048 retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col);
1049 break;
1050
1051
1052
1053
1054
1055 case RF_REVENT_FORCEDREADDONE:
1056 rbuf = (RF_ReconBuffer_t *) event->arg;
1057 rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
1058 DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d.\n",
1059 frow, event->col);
1060 submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
1061 RF_ASSERT(!submitblocked);
1062 break;
1063
1064 default:
1065 RF_PANIC();
1066 }
1067 rf_FreeReconEventDesc(event);
1068 return (retcode);
1069 }
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 int
1093 rf_IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
1094 {
1095 RF_PerDiskReconCtrl_t *ctrl =
1096 &raidPtr->reconControl[row]->perDiskInfo[col];
1097 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
1098 RF_ReconBuffer_t *rbuf = ctrl->rbuf;
1099 RF_ReconUnitCount_t RUsPerPU =
1100 layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
1101 RF_SectorCount_t sectorsPerRU =
1102 layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
1103 int do_new_check = 0, retcode = 0, status;
1104
1105
1106
1107
1108
1109 if (ctrl->headSepCounter <=
1110 raidPtr->reconControl[row]->minHeadSepCounter)
1111 do_new_check = 1;
1112
1113 while (1) {
1114
1115 ctrl->ru_count++;
1116 if (ctrl->ru_count < RUsPerPU) {
1117 ctrl->diskOffset += sectorsPerRU;
1118 rbuf->failedDiskSectorOffset += sectorsPerRU;
1119 } else {
1120 ctrl->curPSID++;
1121 ctrl->ru_count = 0;
1122
1123
1124 if (ctrl->curPSID >=
1125 raidPtr->reconControl[row]->lastPSID) {
1126 rf_CheckForNewMinHeadSep(raidPtr, row,
1127 ++(ctrl->headSepCounter));
1128 return (1);
1129 }
1130
1131
1132
1133
1134
1135
1136 status = rf_ComputePSDiskOffsets(raidPtr,
1137 ctrl->curPSID, row, col, &ctrl->diskOffset,
1138 &rbuf->failedDiskSectorOffset, &rbuf->spRow,
1139 &rbuf->spCol, &rbuf->spOffset);
1140 if (status) {
1141 ctrl->ru_count = RUsPerPU - 1;
1142 continue;
1143 }
1144 }
1145 rbuf->which_ru = ctrl->ru_count;
1146
1147
1148 if (rf_CheckRUReconstructed(raidPtr->reconControl[row]
1149 ->reconMap, rbuf->failedDiskSectorOffset)) {
1150 Dprintf2("Skipping psid %ld ru %d: already"
1151 " reconstructed.\n", ctrl->curPSID, ctrl->ru_count);
1152 continue;
1153 }
1154 break;
1155 }
1156 ctrl->headSepCounter++;
1157 if (do_new_check)
1158 rf_CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter);
1159
1160
1161
1162
1163
1164
1165 rbuf->parityStripeID = ctrl->curPSID;
1166 rbuf->which_ru = ctrl->ru_count;
1167 bzero((char *) &raidPtr->recon_tracerecs[col],
1168 sizeof(raidPtr->recon_tracerecs[col]));
1169 raidPtr->recon_tracerecs[col].reconacc = 1;
1170 RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
1171 retcode = rf_TryToRead(raidPtr, row, col);
1172 return (retcode);
1173 }
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184 int
1185 rf_TryToRead(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
1186 {
1187 RF_PerDiskReconCtrl_t *ctrl =
1188 &raidPtr->reconControl[row]->perDiskInfo[col];
1189 RF_SectorCount_t sectorsPerRU =
1190 raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
1191 RF_StripeNum_t psid = ctrl->curPSID;
1192 RF_ReconUnitNum_t which_ru = ctrl->ru_count;
1193 RF_DiskQueueData_t *req;
1194 int status, created = 0;
1195 RF_ReconParityStripeStatus_t *pssPtr;
1196
1197
1198
1199
1200
1201 if (rf_CheckHeadSeparation(raidPtr, ctrl, row, col,
1202 ctrl->headSepCounter, which_ru))
1203 return (0);
1204 RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
1205 pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]
1206 ->pssTable, psid, which_ru, RF_PSS_CREATE, &created);
1207
1208
1209
1210
1211
1212
1213 status = rf_CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl,
1214 row, col, psid, which_ru);
1215 if (status == RF_PSS_RECON_BLOCKED) {
1216 Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked.\n",
1217 psid, which_ru);
1218 goto out;
1219 } else
1220 if (status == RF_PSS_FORCED_ON_WRITE) {
1221 rf_CauseReconEvent(raidPtr, row, col, NULL,
1222 RF_REVENT_SKIP);
1223 goto out;
1224 }
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234 if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap,
1235 ctrl->rbuf->failedDiskSectorOffset)) {
1236 Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after"
1237 " stall.\n", psid, which_ru);
1238 if (created)
1239 rf_PSStatusDelete(raidPtr,
1240 raidPtr->reconControl[row]->pssTable, pssPtr);
1241 rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP);
1242 goto out;
1243 }
1244
1245 Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld"
1246 " buf %lx.\n", psid, row, col, ctrl->diskOffset,
1247 ctrl->rbuf->buffer);
1248 RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer);
1249 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer);
1250 raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us =
1251 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer);
1252 RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
1253
1254
1255
1256
1257
1258 req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset,
1259 sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru,
1260 rf_ReconReadDoneProc, (void *) ctrl, NULL,
1261 &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL);
1262
1263 RF_ASSERT(req);
1264
1265 ctrl->rbuf->arg = (void *) req;
1266 rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY);
1267 pssPtr->issued[col] = 1;
1268
1269 out:
1270 RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
1271 return (0);
1272 }
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300 int
1301 rf_ComputePSDiskOffsets(
1302 RF_Raid_t *raidPtr,
1303 RF_StripeNum_t psid,
1304 RF_RowCol_t row,
1305
1306
1307
1308 RF_RowCol_t col,
1309 RF_SectorNum_t *outDiskOffset,
1310 RF_SectorNum_t *outFailedDiskSectorOffset,
1311 RF_RowCol_t *spRow,
1312
1313
1314
1315 RF_RowCol_t *spCol,
1316 RF_SectorNum_t *spOffset
1317
1318
1319
1320 )
1321 {
1322 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
1323 RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
1324 RF_RaidAddr_t sosRaidAddress;
1325 RF_RowCol_t *diskids;
1326 u_int i, j, k, i_offset, j_offset;
1327 RF_RowCol_t prow, pcol;
1328 int testcol, testrow;
1329 RF_RowCol_t stripe;
1330 RF_SectorNum_t poffset;
1331 char i_is_parity = 0, j_is_parity = 0;
1332 RF_RowCol_t stripeWidth =
1333 layoutPtr->numDataCol + layoutPtr->numParityCol;
1334
1335
1336 sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid);
1337 (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids,
1338 &stripe);
1339 RF_ASSERT(diskids);
1340
1341
1342
1343
1344
1345 if (row != stripe)
1346 goto skipit;
1347 for (i = 0; i < stripeWidth; i++) {
1348 if (col == diskids[i])
1349 break;
1350 }
1351 if (i == stripeWidth)
1352 goto skipit;
1353 for (j = 0; j < stripeWidth; j++) {
1354 if (fcol == diskids[j])
1355 break;
1356 }
1357 if (j == stripeWidth) {
1358 goto skipit;
1359 }
1360
1361 (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol,
1362 &poffset, RF_DONT_REMAP);
1363
1364
1365
1366
1367
1368
1369 for (k = 0; k < stripeWidth; k++)
1370 if (diskids[k] == pcol)
1371 break;
1372 RF_ASSERT(k < stripeWidth);
1373 i_offset = i;
1374 j_offset = j;
1375 if (k < i)
1376 i_offset--;
1377 else
1378 if (k == i) {
1379 i_is_parity = 1;
1380 i_offset = 0;
1381 }
1382
1383
1384
1385 if (k < j)
1386 j_offset--;
1387 else
1388 if (k == j) {
1389 j_is_parity = 1;
1390 j_offset = 0;
1391 }
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 if (i_is_parity)
1403 layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset *
1404 layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
1405 outDiskOffset, RF_DONT_REMAP);
1406 else
1407 layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset *
1408 layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
1409 outDiskOffset, RF_DONT_REMAP);
1410
1411 RF_ASSERT(row == testrow && col == testcol);
1412
1413 if (j_is_parity)
1414 layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset *
1415 layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
1416 outFailedDiskSectorOffset, RF_DONT_REMAP);
1417 else
1418 layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset *
1419 layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
1420 outFailedDiskSectorOffset, RF_DONT_REMAP);
1421 RF_ASSERT(row == testrow && fcol == testcol);
1422
1423
1424 if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
1425 if (j_is_parity)
1426 layoutPtr->map->MapParity(raidPtr, sosRaidAddress +
1427 j_offset * layoutPtr->sectorsPerStripeUnit, spRow,
1428 spCol, spOffset, RF_REMAP);
1429 else
1430 layoutPtr->map->MapSector(raidPtr, sosRaidAddress +
1431 j_offset * layoutPtr->sectorsPerStripeUnit, spRow,
1432 spCol, spOffset, RF_REMAP);
1433 } else {
1434 *spRow = raidPtr->reconControl[row]->spareRow;
1435 *spCol = raidPtr->reconControl[row]->spareCol;
1436 *spOffset = *outFailedDiskSectorOffset;
1437 }
1438
1439 return (0);
1440
1441 skipit:
1442 Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d.\n",
1443 psid, row, col);
1444 return (1);
1445 }
1446
1447
1448
1449
1450
1451
1452 int
1453 rf_IssueNextWriteRequest(RF_Raid_t *raidPtr, RF_RowCol_t row)
1454 {
1455 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
1456 RF_SectorCount_t sectorsPerRU =
1457 layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
1458 RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
1459 RF_ReconBuffer_t *rbuf;
1460 RF_DiskQueueData_t *req;
1461
1462 rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]);
1463 RF_ASSERT(rbuf);
1464
1465
1466
1467 RF_ASSERT(rbuf->pssPtr);
1468
1469 rbuf->pssPtr->writeRbuf = rbuf;
1470 rbuf->pssPtr = NULL;
1471
1472 Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d"
1473 " (failed disk offset %ld) buf %lx.\n",
1474 rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID,
1475 rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer);
1476 Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x.\n",
1477 rbuf->parityStripeID, rbuf->buffer[0] & 0xff,
1478 rbuf->buffer[1] & 0xff, rbuf->buffer[2] & 0xff,
1479 rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
1480
1481
1482
1483
1484
1485 req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset,
1486 sectorsPerRU, rbuf->buffer, rbuf->parityStripeID, rbuf->which_ru,
1487 rf_ReconWriteDoneProc, (void *) rbuf, NULL,
1488 &raidPtr->recon_tracerecs[fcol], (void *) raidPtr, 0, NULL);
1489
1490 RF_ASSERT(req);
1491
1492 rbuf->arg = (void *) req;
1493 rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req,
1494 RF_IO_RECON_PRIORITY);
1495
1496 return (0);
1497 }
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507 int
1508 rf_ReconReadDoneProc(void *arg, int status)
1509 {
1510 RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
1511 RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
1512
1513 if (status) {
1514
1515
1516
1517 printf("Recon read failed !\n");
1518 RF_PANIC();
1519 }
1520 RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
1521 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
1522 raidPtr->recon_tracerecs[ctrl->col].specific.recon.
1523 recon_fetch_to_return_us =
1524 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
1525 RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
1526
1527 rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL,
1528 RF_REVENT_READDONE);
1529 return (0);
1530 }
1531
1532
1533
1534
1535
1536
1537
1538
1539 int
1540 rf_ReconWriteDoneProc(void *arg, int status)
1541 {
1542 RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;
1543
1544 Dprintf2("Reconstruction completed on psid %ld ru %d.\n",
1545 rbuf->parityStripeID, rbuf->which_ru);
1546 if (status) {
1547
1548 printf("Recon write failed !\n");
1549 RF_PANIC();
1550 }
1551 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col,
1552 arg, RF_REVENT_WRITEDONE);
1553 return (0);
1554 }
1555
1556
1557
1558
1559
1560
1561 void
1562 rf_CheckForNewMinHeadSep(RF_Raid_t *raidPtr, RF_RowCol_t row,
1563 RF_HeadSepLimit_t hsCtr)
1564 {
1565 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
1566 RF_HeadSepLimit_t new_min;
1567 RF_RowCol_t i;
1568 RF_CallbackDesc_t *p;
1569
1570 RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter);
1571
1572
1573 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
1574
1575 new_min = ~(1L << (8 * sizeof(long) - 1));
1576 for (i = 0; i < raidPtr->numCol; i++)
1577 if (i != reconCtrlPtr->fcol) {
1578 if (reconCtrlPtr->perDiskInfo[i].headSepCounter <
1579 new_min)
1580 new_min =
1581 reconCtrlPtr->perDiskInfo[i].headSepCounter;
1582 }
1583
1584 if (new_min != reconCtrlPtr->minHeadSepCounter) {
1585 reconCtrlPtr->minHeadSepCounter = new_min;
1586 Dprintf1("RECON: new min head pos counter val is %ld.\n",
1587 new_min);
1588 while (reconCtrlPtr->headSepCBList) {
1589 if (reconCtrlPtr->headSepCBList->callbackArg.v >
1590 new_min)
1591 break;
1592 p = reconCtrlPtr->headSepCBList;
1593 reconCtrlPtr->headSepCBList = p->next;
1594 p->next = NULL;
1595 rf_CauseReconEvent(raidPtr, p->row, p->col, NULL,
1596 RF_REVENT_HEADSEPCLEAR);
1597 rf_FreeCallbackDesc(p);
1598 }
1599
1600 }
1601 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
1602 }
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616 int
1617 rf_CheckHeadSeparation(
1618 RF_Raid_t *raidPtr,
1619 RF_PerDiskReconCtrl_t *ctrl,
1620 RF_RowCol_t row,
1621 RF_RowCol_t col,
1622 RF_HeadSepLimit_t hsCtr,
1623 RF_ReconUnitNum_t which_ru
1624 )
1625 {
1626 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
1627 RF_CallbackDesc_t *cb, *p, *pt;
1628 int retval = 0;
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
1639 if ((raidPtr->headSepLimit >= 0) &&
1640 ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) >
1641 raidPtr->headSepLimit)) {
1642 Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr"
1643 " %ld minHSCtr %ld limit %ld.\n",
1644 raidPtr->raidid, row, col, ctrl->headSepCounter,
1645 reconCtrlPtr->minHeadSepCounter, raidPtr->headSepLimit);
1646 cb = rf_AllocCallbackDesc();
1647
1648
1649
1650
1651 cb->callbackArg.v = (ctrl->headSepCounter -
1652 raidPtr->headSepLimit + raidPtr->headSepLimit / 5);
1653 cb->row = row;
1654 cb->col = col;
1655 cb->next = NULL;
1656
1657
1658
1659
1660
1661 p = reconCtrlPtr->headSepCBList;
1662 if (!p)
1663 reconCtrlPtr->headSepCBList = cb;
1664 else
1665 if (cb->callbackArg.v < p->callbackArg.v) {
1666 cb->next = reconCtrlPtr->headSepCBList;
1667 reconCtrlPtr->headSepCBList = cb;
1668 } else {
1669 for (pt = p, p = p->next;
1670 p && (p->callbackArg.v < cb->callbackArg.v);
1671 pt = p, p = p->next);
1672 cb->next = p;
1673 pt->next = cb;
1674 }
1675 retval = 1;
1676 #if RF_RECON_STATS > 0
1677 ctrl->reconCtrl->reconDesc->hsStallCount++;
1678 #endif
1679 }
1680 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
1681
1682 return (retval);
1683 }
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694 int
1695 rf_CheckForcedOrBlockedReconstruction(
1696 RF_Raid_t *raidPtr,
1697 RF_ReconParityStripeStatus_t *pssPtr,
1698 RF_PerDiskReconCtrl_t *ctrl,
1699 RF_RowCol_t row,
1700 RF_RowCol_t col,
1701 RF_StripeNum_t psid,
1702 RF_ReconUnitNum_t which_ru
1703 )
1704 {
1705 RF_CallbackDesc_t *cb;
1706 int retcode = 0;
1707
1708 if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) ||
1709 (pssPtr->flags & RF_PSS_FORCED_ON_WRITE))
1710 retcode = RF_PSS_FORCED_ON_WRITE;
1711 else
1712 if (pssPtr->flags & RF_PSS_RECON_BLOCKED) {
1713 Dprintf4("RECON: row %d col %d blocked at psid %ld"
1714 " ru %d.\n", row, col, psid, which_ru);
1715 cb = rf_AllocCallbackDesc();
1716
1717
1718
1719
1720 cb->row = row;
1721 cb->col = col;
1722 cb->next = pssPtr->blockWaitList;
1723 pssPtr->blockWaitList = cb;
1724 retcode = RF_PSS_RECON_BLOCKED;
1725 }
1726 if (!retcode)
1727 pssPtr->flags |= RF_PSS_UNDER_RECON;
1728
1729
1730
1731
1732 return (retcode);
1733 }
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744 int
1745 rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1746 void (*cbFunc) (RF_Raid_t *, void *), void *cbArg)
1747 {
1748 RF_RowCol_t row = asmap->physInfo->row;
1749
1750
1751
1752 RF_StripeNum_t stripeID = asmap->stripeID;
1753
1754
1755
1756 RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit *
1757 raidPtr->Layout.SUsPerRU;
1758 RF_ReconParityStripeStatus_t *pssPtr;
1759
1760
1761
1762 RF_StripeNum_t psid;
1763 RF_SectorNum_t offset, fd_offset;
1764
1765
1766
1767 RF_RowCol_t *diskids;
1768 RF_RowCol_t stripe;
1769 RF_ReconUnitNum_t which_ru;
1770 RF_RowCol_t fcol, diskno, i;
1771 RF_ReconBuffer_t *new_rbuf;
1772 RF_DiskQueueData_t *req;
1773 RF_CallbackDesc_t *cb;
1774 int created = 0, nPromoted;
1775
1776 psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID,
1777 &which_ru);
1778
1779 RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
1780
1781 pssPtr = rf_LookupRUStatus(raidPtr,
1782 raidPtr->reconControl[row]->pssTable, psid, which_ru,
1783 RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created);
1784
1785
1786 if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
1787 RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
1788 return (0);
1789 }
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799 if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) &&
1800 !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) {
1801 DDprintf1("Forcing recon on psid %ld.\n", psid);
1802
1803 pssPtr->flags |= RF_PSS_FORCED_ON_WRITE;
1804
1805 pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
1806 fcol = raidPtr->reconControl[row]->fcol;
1807
1808
1809
1810
1811 (raidPtr->Layout.map->IdentifyStripe) (raidPtr,
1812 asmap->raidAddress, &diskids, &stripe);
1813 RF_ASSERT(row == stripe);
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823 for (i = 0; i < raidPtr->Layout.numDataCol +
1824 raidPtr->Layout.numParityCol; i++)
1825 if ((diskno = diskids[i]) != fcol) {
1826 if (pssPtr->issued[diskno]) {
1827 nPromoted = rf_DiskIOPromote(&raidPtr
1828 ->Queues[row][diskno], psid,
1829 which_ru);
1830 if (rf_reconDebug && nPromoted)
1831 printf("raid%d: promoted read"
1832 " from row %d col %d.\n",
1833 raidPtr->raidid, row,
1834 diskno);
1835 } else {
1836
1837 new_rbuf = rf_MakeReconBuffer(raidPtr,
1838 row, diskno, RF_RBUF_TYPE_FORCED);
1839
1840 rf_ComputePSDiskOffsets(raidPtr, psid,
1841 row, diskno, &offset, &fd_offset,
1842 &new_rbuf->spRow, &new_rbuf->spCol,
1843 &new_rbuf->spOffset);
1844 new_rbuf->parityStripeID = psid;
1845
1846 new_rbuf->which_ru = which_ru;
1847 new_rbuf->failedDiskSectorOffset =
1848 fd_offset;
1849 new_rbuf->priority =
1850 RF_IO_NORMAL_PRIORITY;
1851
1852
1853
1854
1855
1856 req = rf_CreateDiskQueueData(
1857 RF_IO_TYPE_READ, offset +
1858 which_ru * sectorsPerRU,
1859 sectorsPerRU, new_rbuf->buffer,
1860 psid, which_ru, (int (*)
1861 (void *, int))
1862 rf_ForceReconReadDoneProc,
1863 (void *) new_rbuf, NULL,
1864 NULL, (void *) raidPtr, 0, NULL);
1865
1866 RF_ASSERT(req);
1867
1868
1869
1870
1871 new_rbuf->arg = req;
1872
1873 rf_DiskIOEnqueue(&raidPtr
1874 ->Queues[row][diskno], req,
1875 RF_IO_NORMAL_PRIORITY);
1876 Dprintf3("raid%d: Issued new read req"
1877 " on row %d col %d.\n",
1878 raidPtr->raidid, row, diskno);
1879 }
1880 }
1881
1882
1883
1884
1885 if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol],
1886 psid, which_ru))
1887 printf("raid%d: promoted write to row %d col %d.\n",
1888 raidPtr->raidid, row, fcol);
1889 }
1890
1891
1892
1893
1894 cb = rf_AllocCallbackDesc();
1895
1896
1897
1898
1899 cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc;
1900 cb->callbackArg.p = (void *) cbArg;
1901 cb->next = pssPtr->procWaitList;
1902 pssPtr->procWaitList = cb;
1903 DDprintf2("raid%d: Waiting for forced recon on psid %ld.\n",
1904 raidPtr->raidid, psid);
1905
1906 RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
1907 return (1);
1908 }
1909
1910
1911
1912
1913
1914
1915
1916 void
1917 rf_ForceReconReadDoneProc(void *arg, int status)
1918 {
1919 RF_ReconBuffer_t *rbuf = arg;
1920
1921 if (status) {
1922
1923 printf("Forced recon read failed !\n");
1924 RF_PANIC();
1925 }
1926 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col,
1927 (void *) rbuf, RF_REVENT_FORCEDREADDONE);
1928 }
1929
1930
1931
1932 int
1933 rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
1934 {
1935 RF_RowCol_t row = asmap->origRow;
1936 RF_StripeNum_t stripeID = asmap->stripeID;
1937 RF_ReconParityStripeStatus_t *pssPtr;
1938 RF_ReconUnitNum_t which_ru;
1939 RF_StripeNum_t psid;
1940 int created = 0;
1941 RF_CallbackDesc_t *cb;
1942
1943 psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID,
1944 &which_ru);
1945 RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
1946 pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]
1947 ->pssTable, psid, which_ru, RF_PSS_NONE, &created);
1948
1949
1950
1951
1952
1953
1954
1955 if (!pssPtr) {
1956
1957
1958
1959
1960 if (rf_reconDebug || rf_pssDebug)
1961 printf("Warning: no pss descriptor upon unblock on"
1962 " psid %ld RU %d.\n", (long) psid, which_ru);
1963 goto out;
1964 }
1965 pssPtr->blockCount--;
1966 Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d.\n",
1967 raidPtr->raidid, psid, pssPtr->blockCount);
1968 if (pssPtr->blockCount == 0) {
1969
1970
1971
1972
1973
1974
1975
1976 pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
1977
1978
1979 while (pssPtr->blockWaitList) {
1980
1981
1982
1983
1984 cb = pssPtr->blockWaitList;
1985 pssPtr->blockWaitList = cb->next;
1986 cb->next = NULL;
1987 rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL,
1988 RF_REVENT_BLOCKCLEAR);
1989 rf_FreeCallbackDesc(cb);
1990 }
1991 if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
1992
1993 rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]
1994 ->pssTable, pssPtr);
1995 }
1996 }
1997 out:
1998 RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
1999 return (0);
2000 }