This source file includes following definitions.
- RF_ParityLoggingConfigInfo_t
- rf_ConfigureParityLogging
- rf_FreeRegionInfo
- rf_FreeParityLogQueue
- rf_FreeRegionBufferQueue
- rf_ShutdownParityLoggingRegionInfo
- rf_ShutdownParityLoggingPool
- rf_ShutdownParityLoggingRegionBufferPool
- rf_ShutdownParityLoggingParityBufferPool
- rf_ShutdownParityLoggingDiskQueue
- rf_ShutdownParityLogging
- rf_GetDefaultNumFloatingReconBuffersParityLogging
- rf_GetDefaultHeadSepLimitParityLogging
- rf_MapRegionIDParityLogging
- rf_MapSectorParityLogging
- rf_MapParityParityLogging
- rf_MapLogParityLogging
- rf_MapRegionParity
- rf_IdentifyStripeParityLogging
- rf_MapSIDToPSIDParityLogging
- rf_ParityLoggingDagSelect
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 #include "rf_archs.h"
37
38 #if RF_INCLUDE_PARITYLOGGING > 0
39
40 #include "rf_types.h"
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_dagffrd.h"
46 #include "rf_dagffwr.h"
47 #include "rf_dagdegrd.h"
48 #include "rf_dagdegwr.h"
49 #include "rf_paritylog.h"
50 #include "rf_paritylogDiskMgr.h"
51 #include "rf_paritylogging.h"
52 #include "rf_parityloggingdags.h"
53 #include "rf_general.h"
54 #include "rf_map.h"
55 #include "rf_utils.h"
56 #include "rf_shutdown.h"
57
58 typedef struct RF_ParityLoggingConfigInfo_s {
59 RF_RowCol_t **stripeIdentifier;
60
61
62
63 } RF_ParityLoggingConfigInfo_t;
64
65 void rf_FreeRegionInfo(RF_Raid_t *, RF_RegionId_t);
66 void rf_FreeParityLogQueue(RF_Raid_t *, RF_ParityLogQueue_t *);
67 void rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *);
68 void rf_ShutdownParityLogging(RF_ThreadArg_t);
69 void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t);
70 void rf_ShutdownParityLoggingPool(RF_ThreadArg_t);
71 void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t);
72 void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t);
73 void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t);
74
75
76 int
77 rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
78 RF_Config_t *cfgPtr)
79 {
80 int i, j, startdisk, rc;
81 RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity;
82 RF_SectorCount_t parityBufferCapacity, maxRegionParityRange;
83 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
84 RF_ParityLoggingConfigInfo_t *info;
85 RF_ParityLog_t *l = NULL, *next;
86 caddr_t lHeapPtr;
87
88 if (rf_numParityRegions <= 0)
89 return(EINVAL);
90
91
92
93
94
95
96
97
98 raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG;
99
100
101 RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t),
102 (RF_ParityLoggingConfigInfo_t *),
103 raidPtr->cleanupList);
104 if (info == NULL)
105 return (ENOMEM);
106 layoutPtr->layoutSpecificInfo = (void *) info;
107
108 RF_ASSERT(raidPtr->numRow == 1);
109
110
111
112
113
114 info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol),
115 (raidPtr->numCol),
116 raidPtr->cleanupList);
117 if (info->stripeIdentifier == NULL)
118 return (ENOMEM);
119
120 startdisk = 0;
121 for (i = 0; i < (raidPtr->numCol); i++) {
122 for (j = 0; j < (raidPtr->numCol); j++) {
123 info->stripeIdentifier[i][j] = (startdisk + j) %
124 (raidPtr->numCol - 1);
125 }
126 if ((--startdisk) < 0)
127 startdisk = raidPtr->numCol - 1 - 1;
128 }
129
130
131 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
132 layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
133 raidPtr->logBytesPerSector;
134 layoutPtr->numParityCol = 1;
135 layoutPtr->numParityLogCol = 1;
136 layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol -
137 layoutPtr->numParityLogCol;
138 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol *
139 layoutPtr->sectorsPerStripeUnit;
140 layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
141 raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
142 layoutPtr->sectorsPerStripeUnit;
143
144 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
145 layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170 totalLogCapacity = layoutPtr->stripeUnitsPerDisk *
171 layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol;
172 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
173 if (rf_parityLogDebug)
174 printf("bytes per sector %d\n", raidPtr->bytesPerSector);
175
176
177
178
179
180
181 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
182 if (fragmentation > 0)
183 for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) {
184 if (((totalLogCapacity / (rf_numParityRegions + i)) %
185 raidPtr->numSectorsPerLog) < fragmentation) {
186 rf_numParityRegions++;
187 raidPtr->regionLogCapacity = totalLogCapacity /
188 rf_numParityRegions;
189 fragmentation = raidPtr->regionLogCapacity %
190 raidPtr->numSectorsPerLog;
191 }
192 if (((totalLogCapacity / (rf_numParityRegions - i)) %
193 raidPtr->numSectorsPerLog) < fragmentation) {
194 rf_numParityRegions--;
195 raidPtr->regionLogCapacity = totalLogCapacity /
196 rf_numParityRegions;
197 fragmentation = raidPtr->regionLogCapacity %
198 raidPtr->numSectorsPerLog;
199 }
200 }
201
202 raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity /
203 raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog;
204
205 raidPtr->numParityLogs = rf_totalInCoreLogCapacity /
206 (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog);
207
208
209
210
211 if (raidPtr->numParityLogs < rf_numParityRegions)
212 raidPtr->numParityLogs = rf_numParityRegions;
213
214
215 printf("Allocating %d bytes for in-core parity region info\n",
216 (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
217 RF_Malloc(raidPtr->regionInfo,
218 (rf_numParityRegions * sizeof(RF_RegionInfo_t)),
219 (RF_RegionInfo_t *));
220 if (raidPtr->regionInfo == NULL)
221 return (ENOMEM);
222
223
224 lastRegionCapacity = raidPtr->regionLogCapacity;
225 while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity +
226 lastRegionCapacity > totalLogCapacity)
227 lastRegionCapacity = lastRegionCapacity -
228 raidPtr->numSectorsPerLog;
229
230 raidPtr->regionParityRange = raidPtr->sectorsPerDisk /
231 rf_numParityRegions;
232 maxRegionParityRange = raidPtr->regionParityRange;
233
234
235
236
237
238
239 printf("Allocating %d bytes for %d parity logs\n",
240 raidPtr->numParityLogs * raidPtr->numSectorsPerLog *
241 raidPtr->bytesPerSector,
242 raidPtr->numParityLogs);
243 RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
244 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector,
245 (caddr_t));
246 if (raidPtr->parityLogBufferHeap == NULL)
247 return (ENOMEM);
248 lHeapPtr = raidPtr->parityLogBufferHeap;
249 rc = rf_mutex_init(&raidPtr->parityLogPool.mutex);
250 if (rc) {
251 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
252 __FILE__, __LINE__, rc);
253 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
254 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
255 return (ENOMEM);
256 }
257 for (i = 0; i < raidPtr->numParityLogs; i++) {
258 if (i == 0) {
259 RF_Calloc(raidPtr->parityLogPool.parityLogs, 1,
260 sizeof(RF_ParityLog_t), (RF_ParityLog_t *));
261 if (raidPtr->parityLogPool.parityLogs == NULL) {
262 RF_Free(raidPtr->parityLogBufferHeap,
263 raidPtr->numParityLogs *
264 raidPtr->numSectorsPerLog *
265 raidPtr->bytesPerSector);
266 return (ENOMEM);
267 }
268 l = raidPtr->parityLogPool.parityLogs;
269 } else {
270 RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t),
271 (RF_ParityLog_t *));
272 if (l->next == NULL) {
273 RF_Free(raidPtr->parityLogBufferHeap,
274 raidPtr->numParityLogs *
275 raidPtr->numSectorsPerLog *
276 raidPtr->bytesPerSector);
277 for (l = raidPtr->parityLogPool.parityLogs;
278 l;
279 l = next) {
280 next = l->next;
281 if (l->records)
282 RF_Free(l->records,
283 (raidPtr->numSectorsPerLog *
284 sizeof(RF_ParityLogRecord_t)));
285 RF_Free(l, sizeof(RF_ParityLog_t));
286 }
287 return (ENOMEM);
288 }
289 l = l->next;
290 }
291 l->bufPtr = lHeapPtr;
292 lHeapPtr += raidPtr->numSectorsPerLog *
293 raidPtr->bytesPerSector;
294 RF_Malloc(l->records, (raidPtr->numSectorsPerLog *
295 sizeof(RF_ParityLogRecord_t)),
296 (RF_ParityLogRecord_t *));
297 if (l->records == NULL) {
298 RF_Free(raidPtr->parityLogBufferHeap,
299 raidPtr->numParityLogs *
300 raidPtr->numSectorsPerLog *
301 raidPtr->bytesPerSector);
302 for (l = raidPtr->parityLogPool.parityLogs;
303 l;
304 l = next) {
305 next = l->next;
306 if (l->records)
307 RF_Free(l->records,
308 (raidPtr->numSectorsPerLog *
309 sizeof(RF_ParityLogRecord_t)));
310 RF_Free(l, sizeof(RF_ParityLog_t));
311 }
312 return (ENOMEM);
313 }
314 }
315 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr);
316 if (rc) {
317 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
318 " rc=%d\n", __FILE__, __LINE__, rc);
319 rf_ShutdownParityLoggingPool(raidPtr);
320 return (rc);
321 }
322
323 rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex);
324 if (rc) {
325 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
326 __FILE__, __LINE__, rc);
327 return (ENOMEM);
328 }
329 rc = rf_cond_init(&raidPtr->regionBufferPool.cond);
330 if (rc) {
331 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
332 __FILE__, __LINE__, rc);
333 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
334 return (ENOMEM);
335 }
336 raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity *
337 raidPtr->bytesPerSector;
338 printf("regionBufferPool.bufferSize %d\n",
339 raidPtr->regionBufferPool.bufferSize);
340
341
342 raidPtr->regionBufferPool.totalBuffers = 1;
343
344 raidPtr->regionBufferPool.availableBuffers =
345 raidPtr->regionBufferPool.totalBuffers;
346 raidPtr->regionBufferPool.availBuffersIndex = 0;
347 raidPtr->regionBufferPool.emptyBuffersIndex = 0;
348 printf("Allocating %d bytes for regionBufferPool\n",
349 (int) (raidPtr->regionBufferPool.totalBuffers *
350 sizeof(caddr_t)));
351 RF_Malloc(raidPtr->regionBufferPool.buffers,
352 raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t),
353 (caddr_t *));
354 if (raidPtr->regionBufferPool.buffers == NULL) {
355 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
356 rf_cond_destroy(&raidPtr->regionBufferPool.cond);
357 return (ENOMEM);
358 }
359 for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) {
360 printf("Allocating %d bytes for regionBufferPool#%d\n",
361 (int) (raidPtr->regionBufferPool.bufferSize *
362 sizeof(char)), i);
363 RF_Malloc(raidPtr->regionBufferPool.buffers[i],
364 raidPtr->regionBufferPool.bufferSize * sizeof(char),
365 (caddr_t));
366 if (raidPtr->regionBufferPool.buffers[i] == NULL) {
367 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
368 rf_cond_destroy(&raidPtr->regionBufferPool.cond);
369 for (j = 0; j < i; j++) {
370 RF_Free(raidPtr->regionBufferPool.buffers[i],
371 raidPtr->regionBufferPool.bufferSize *
372 sizeof(char));
373 }
374 RF_Free(raidPtr->regionBufferPool.buffers,
375 raidPtr->regionBufferPool.totalBuffers *
376 sizeof(caddr_t));
377 return (ENOMEM);
378 }
379 printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i,
380 (long) raidPtr->regionBufferPool.buffers[i]);
381 }
382 rc = rf_ShutdownCreate(listp,
383 rf_ShutdownParityLoggingRegionBufferPool,
384 raidPtr);
385 if (rc) {
386 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
387 " rc=%d\n", __FILE__, __LINE__, rc);
388 rf_ShutdownParityLoggingRegionBufferPool(raidPtr);
389 return (rc);
390 }
391
392 parityBufferCapacity = maxRegionParityRange;
393 rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex);
394 if (rc) {
395 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
396 __FILE__, __LINE__, rc);
397 return (rc);
398 }
399 rc = rf_cond_init(&raidPtr->parityBufferPool.cond);
400 if (rc) {
401 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
402 __FILE__, __LINE__, rc);
403 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
404 return (ENOMEM);
405 }
406 raidPtr->parityBufferPool.bufferSize = parityBufferCapacity *
407 raidPtr->bytesPerSector;
408 printf("parityBufferPool.bufferSize %d\n",
409 raidPtr->parityBufferPool.bufferSize);
410
411
412 raidPtr->parityBufferPool.totalBuffers = 1;
413
414 raidPtr->parityBufferPool.availableBuffers =
415 raidPtr->parityBufferPool.totalBuffers;
416 raidPtr->parityBufferPool.availBuffersIndex = 0;
417 raidPtr->parityBufferPool.emptyBuffersIndex = 0;
418 printf("Allocating %d bytes for parityBufferPool of %d units\n",
419 (int) (raidPtr->parityBufferPool.totalBuffers *
420 sizeof(caddr_t)),
421 raidPtr->parityBufferPool.totalBuffers);
422 RF_Malloc(raidPtr->parityBufferPool.buffers,
423 raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t),
424 (caddr_t *));
425 if (raidPtr->parityBufferPool.buffers == NULL) {
426 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
427 rf_cond_destroy(&raidPtr->parityBufferPool.cond);
428 return (ENOMEM);
429 }
430 for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) {
431 printf("Allocating %d bytes for parityBufferPool#%d\n",
432 (int) (raidPtr->parityBufferPool.bufferSize *
433 sizeof(char)), i);
434 RF_Malloc(raidPtr->parityBufferPool.buffers[i],
435 raidPtr->parityBufferPool.bufferSize * sizeof(char),
436 (caddr_t));
437 if (raidPtr->parityBufferPool.buffers == NULL) {
438 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
439 rf_cond_destroy(&raidPtr->parityBufferPool.cond);
440 for (j = 0; j < i; j++) {
441 RF_Free(raidPtr->parityBufferPool.buffers[i],
442 raidPtr->regionBufferPool.bufferSize *
443 sizeof(char));
444 }
445 RF_Free(raidPtr->parityBufferPool.buffers,
446 raidPtr->regionBufferPool.totalBuffers *
447 sizeof(caddr_t));
448 return (ENOMEM);
449 }
450 printf("parityBufferPool.buffers[%d] = %lx\n", i,
451 (long) raidPtr->parityBufferPool.buffers[i]);
452 }
453 rc = rf_ShutdownCreate(listp,
454 rf_ShutdownParityLoggingParityBufferPool,
455 raidPtr);
456 if (rc) {
457 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
458 " rc=%d\n", __FILE__, __LINE__, rc);
459 rf_ShutdownParityLoggingParityBufferPool(raidPtr);
460 return (rc);
461 }
462
463 rc = rf_create_managed_mutex(listp,
464 &raidPtr->parityLogDiskQueue.mutex);
465 if (rc) {
466 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
467 __FILE__, __LINE__, rc);
468 return (rc);
469 }
470 rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond);
471 if (rc) {
472 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
473 __FILE__, __LINE__, rc);
474 return (rc);
475 }
476 raidPtr->parityLogDiskQueue.flushQueue = NULL;
477 raidPtr->parityLogDiskQueue.reintQueue = NULL;
478 raidPtr->parityLogDiskQueue.bufHead = NULL;
479 raidPtr->parityLogDiskQueue.bufTail = NULL;
480 raidPtr->parityLogDiskQueue.reintHead = NULL;
481 raidPtr->parityLogDiskQueue.reintTail = NULL;
482 raidPtr->parityLogDiskQueue.logBlockHead = NULL;
483 raidPtr->parityLogDiskQueue.logBlockTail = NULL;
484 raidPtr->parityLogDiskQueue.reintBlockHead = NULL;
485 raidPtr->parityLogDiskQueue.reintBlockTail = NULL;
486 raidPtr->parityLogDiskQueue.freeDataList = NULL;
487 raidPtr->parityLogDiskQueue.freeCommonList = NULL;
488
489 rc = rf_ShutdownCreate(listp,
490 rf_ShutdownParityLoggingDiskQueue,
491 raidPtr);
492 if (rc) {
493 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
494 " rc=%d\n", __FILE__, __LINE__, rc);
495 return (rc);
496 }
497 for (i = 0; i < rf_numParityRegions; i++) {
498 rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex);
499 if (rc) {
500 RF_ERRORMSG3("Unable to init mutex file %s line %d"
501 " rc=%d\n", __FILE__, __LINE__, rc);
502 for (j = 0; j < i; j++)
503 rf_FreeRegionInfo(raidPtr, j);
504 RF_Free(raidPtr->regionInfo,
505 (rf_numParityRegions *
506 sizeof(RF_RegionInfo_t)));
507 return (ENOMEM);
508 }
509 rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex);
510 if (rc) {
511 RF_ERRORMSG3("Unable to init mutex file %s line %d"
512 " rc=%d\n", __FILE__, __LINE__, rc);
513 rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
514 for (j = 0; j < i; j++)
515 rf_FreeRegionInfo(raidPtr, j);
516 RF_Free(raidPtr->regionInfo,
517 (rf_numParityRegions *
518 sizeof(RF_RegionInfo_t)));
519 return (ENOMEM);
520 }
521 raidPtr->regionInfo[i].reintInProgress = RF_FALSE;
522 raidPtr->regionInfo[i].regionStartAddr =
523 raidPtr->regionLogCapacity * i;
524 raidPtr->regionInfo[i].parityStartAddr =
525 raidPtr->regionParityRange * i;
526 if (i < rf_numParityRegions - 1) {
527 raidPtr->regionInfo[i].capacity =
528 raidPtr->regionLogCapacity;
529 raidPtr->regionInfo[i].numSectorsParity =
530 raidPtr->regionParityRange;
531 } else {
532 raidPtr->regionInfo[i].capacity = lastRegionCapacity;
533 raidPtr->regionInfo[i].numSectorsParity =
534 raidPtr->sectorsPerDisk -
535 raidPtr->regionParityRange * i;
536 if (raidPtr->regionInfo[i].numSectorsParity >
537 maxRegionParityRange)
538 maxRegionParityRange =
539 raidPtr->regionInfo[i].numSectorsParity;
540 }
541 raidPtr->regionInfo[i].diskCount = 0;
542 RF_ASSERT(raidPtr->regionInfo[i].capacity +
543 raidPtr->regionInfo[i].regionStartAddr <=
544 totalLogCapacity);
545 RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr +
546 raidPtr->regionInfo[i].numSectorsParity <=
547 raidPtr->sectorsPerDisk);
548 printf("Allocating %d bytes for region %d\n",
549 (int) (raidPtr->regionInfo[i].capacity *
550 sizeof(RF_DiskMap_t)), i);
551 RF_Malloc(raidPtr->regionInfo[i].diskMap,
552 (raidPtr->regionInfo[i].capacity *
553 sizeof(RF_DiskMap_t)),
554 (RF_DiskMap_t *));
555 if (raidPtr->regionInfo[i].diskMap == NULL) {
556 rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
557 rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex);
558 for (j = 0; j < i; j++)
559 rf_FreeRegionInfo(raidPtr, j);
560 RF_Free(raidPtr->regionInfo,
561 (rf_numParityRegions *
562 sizeof(RF_RegionInfo_t)));
563 return (ENOMEM);
564 }
565 raidPtr->regionInfo[i].loggingEnabled = RF_FALSE;
566 raidPtr->regionInfo[i].coreLog = NULL;
567 }
568 rc = rf_ShutdownCreate(listp,
569 rf_ShutdownParityLoggingRegionInfo,
570 raidPtr);
571 if (rc) {
572 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
573 " rc=%d\n", __FILE__, __LINE__, rc);
574 rf_ShutdownParityLoggingRegionInfo(raidPtr);
575 return (rc);
576 }
577 RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0);
578 raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED;
579 rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle,
580 rf_ParityLoggingDiskManager, raidPtr, "rf_log");
581 if (rc) {
582 raidPtr->parityLogDiskQueue.threadState = 0;
583 RF_ERRORMSG3("Unable to create parity logging disk thread"
584 " file %s line %d rc=%d\n",
585 __FILE__, __LINE__, rc);
586 return (ENOMEM);
587 }
588
589 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
590 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) {
591 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
592 raidPtr->parityLogDiskQueue.mutex);
593 }
594 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
595
596 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr);
597 if (rc) {
598 RF_ERRORMSG1("Got rc=%d adding parity logging shutdown"
599 " event.\n", rc);
600 rf_ShutdownParityLogging(raidPtr);
601 return (rc);
602 }
603 if (rf_parityLogDebug) {
604 printf("\t\t\tsize of disk log in sectors: %d\n",
605 (int) totalLogCapacity);
606 printf("\t\t\ttotal number of parity regions is %d\n",
607 (int) rf_numParityRegions);
608 printf("\t\t\tnominal sectors of log per parity region is %d\n",
609 (int) raidPtr->regionLogCapacity);
610 printf("\t\t\tnominal region fragmentation is %d sectors\n",
611 (int) fragmentation);
612 printf("\t\t\ttotal number of parity logs is %d\n",
613 raidPtr->numParityLogs);
614 printf("\t\t\tparity log size is %d sectors\n",
615 raidPtr->numSectorsPerLog);
616 printf("\t\t\ttotal in-core log space is %d bytes\n",
617 (int) rf_totalInCoreLogCapacity);
618 }
619 rf_EnableParityLogging(raidPtr);
620
621 return (0);
622 }
623
624
625 void
626 rf_FreeRegionInfo(RF_Raid_t *raidPtr, RF_RegionId_t regionID)
627 {
628 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
629 RF_Free(raidPtr->regionInfo[regionID].diskMap,
630 (raidPtr->regionInfo[regionID].capacity *
631 sizeof(RF_DiskMap_t)));
632 if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) {
633 rf_ReleaseParityLogs(raidPtr,
634 raidPtr->regionInfo[regionID].coreLog);
635 raidPtr->regionInfo[regionID].coreLog = NULL;
636 } else {
637 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL);
638 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0);
639 }
640 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
641 rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex);
642 rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex);
643 }
644
645
646 void
647 rf_FreeParityLogQueue(RF_Raid_t *raidPtr, RF_ParityLogQueue_t *queue)
648 {
649 RF_ParityLog_t *l1, *l2;
650
651 RF_LOCK_MUTEX(queue->mutex);
652 l1 = queue->parityLogs;
653 while (l1) {
654 l2 = l1;
655 l1 = l2->next;
656 RF_Free(l2->records, (raidPtr->numSectorsPerLog *
657 sizeof(RF_ParityLogRecord_t)));
658 RF_Free(l2, sizeof(RF_ParityLog_t));
659 }
660 RF_UNLOCK_MUTEX(queue->mutex);
661 rf_mutex_destroy(&queue->mutex);
662 }
663
664
665 void
666 rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *queue)
667 {
668 int i;
669
670 RF_LOCK_MUTEX(queue->mutex);
671 if (queue->availableBuffers != queue->totalBuffers) {
672 printf("Attempt to free region queue that is still in use !\n");
673 RF_ASSERT(0);
674 }
675 for (i = 0; i < queue->totalBuffers; i++)
676 RF_Free(queue->buffers[i], queue->bufferSize);
677 RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t));
678 RF_UNLOCK_MUTEX(queue->mutex);
679 rf_mutex_destroy(&queue->mutex);
680 }
681
682
683 void
684 rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)
685 {
686 RF_Raid_t *raidPtr;
687 RF_RegionId_t i;
688
689 raidPtr = (RF_Raid_t *) arg;
690 if (rf_parityLogDebug) {
691 printf("raid%d: ShutdownParityLoggingRegionInfo\n",
692 raidPtr->raidid);
693 }
694
695 for (i = 0; i < rf_numParityRegions; i++)
696 rf_FreeRegionInfo(raidPtr, i);
697 RF_Free(raidPtr->regionInfo, (rf_numParityRegions *
698 sizeof(raidPtr->regionInfo)));
699 raidPtr->regionInfo = NULL;
700 }
701
702
703 void
704 rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)
705 {
706 RF_Raid_t *raidPtr;
707
708 raidPtr = (RF_Raid_t *) arg;
709 if (rf_parityLogDebug) {
710 printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid);
711 }
712
713 rf_FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool);
714 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
715 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
716 }
717
718
719 void
720 rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)
721 {
722 RF_Raid_t *raidPtr;
723
724 raidPtr = (RF_Raid_t *) arg;
725 if (rf_parityLogDebug) {
726 printf("raid%d: ShutdownParityLoggingRegionBufferPool\n",
727 raidPtr->raidid);
728 }
729 rf_FreeRegionBufferQueue(&raidPtr->regionBufferPool);
730 }
731
732
733 void
734 rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)
735 {
736 RF_Raid_t *raidPtr;
737
738 raidPtr = (RF_Raid_t *) arg;
739 if (rf_parityLogDebug) {
740 printf("raid%d: ShutdownParityLoggingParityBufferPool\n",
741 raidPtr->raidid);
742 }
743 rf_FreeRegionBufferQueue(&raidPtr->parityBufferPool);
744 }
745
746
747 void
748 rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)
749 {
750 RF_ParityLogData_t *d;
751 RF_CommonLogData_t *c;
752 RF_Raid_t *raidPtr;
753
754 raidPtr = (RF_Raid_t *) arg;
755 if (rf_parityLogDebug) {
756 printf("raid%d: ShutdownParityLoggingDiskQueue\n",
757 raidPtr->raidid);
758 }
759
760 RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL);
761 RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL);
762 RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL);
763 RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL);
764 while (raidPtr->parityLogDiskQueue.freeDataList) {
765 d = raidPtr->parityLogDiskQueue.freeDataList;
766 raidPtr->parityLogDiskQueue.freeDataList =
767 raidPtr->parityLogDiskQueue.freeDataList->next;
768 RF_Free(d, sizeof(RF_ParityLogData_t));
769 }
770 while (raidPtr->parityLogDiskQueue.freeCommonList) {
771 c = raidPtr->parityLogDiskQueue.freeCommonList;
772 rf_mutex_destroy(&c->mutex);
773 raidPtr->parityLogDiskQueue.freeCommonList =
774 raidPtr->parityLogDiskQueue.freeCommonList->next;
775 RF_Free(c, sizeof(RF_CommonLogData_t));
776 }
777 }
778
779
780 void
781 rf_ShutdownParityLogging(RF_ThreadArg_t arg)
782 {
783 RF_Raid_t *raidPtr;
784
785 raidPtr = (RF_Raid_t *) arg;
786 if (rf_parityLogDebug) {
787 printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid);
788 }
789
790
791
792
793
794
795
796 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
797 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE;
798 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
799 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
800
801
802
803
804 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
805 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) {
806 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
807 raidPtr->parityLogDiskQueue.mutex);
808 }
809 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
810 if (rf_parityLogDebug) {
811 printf("raid%d: ShutdownParityLogging done"
812 " (thread completed)\n", raidPtr->raidid);
813 }
814 }
815
816
817 int
818 rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr)
819 {
820 return (20);
821 }
822
823
824 RF_HeadSepLimit_t
825 rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr)
826 {
827 return (10);
828 }
829
830
831
832 RF_RegionId_t
833 rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr, RF_SectorNum_t address)
834 {
835 RF_RegionId_t regionID;
836
837
838
839 regionID = address / raidPtr->regionParityRange;
840 if (regionID == rf_numParityRegions) {
841
842 regionID--;
843 }
844 RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr);
845 RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr +
846 raidPtr->regionInfo[regionID].numSectorsParity);
847 RF_ASSERT(regionID < rf_numParityRegions);
848 return (regionID);
849 }
850
851
852
853 void
854 rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
855 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
856 {
857 RF_StripeNum_t SUID = raidSector /
858 raidPtr->Layout.sectorsPerStripeUnit;
859 *row = 0;
860
861
862 *col = SUID % raidPtr->Layout.numDataCol;
863 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
864 raidPtr->Layout.sectorsPerStripeUnit +
865 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
866 }
867
868
869
870 void
871 rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
872 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
873 {
874 RF_StripeNum_t SUID = raidSector /
875 raidPtr->Layout.sectorsPerStripeUnit;
876
877 *row = 0;
878
879
880
881 *col = raidPtr->Layout.numDataCol;
882 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
883 raidPtr->Layout.sectorsPerStripeUnit +
884 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
885 }
886
887
888
889
890
891
892 void
893 rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
894 RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col,
895 RF_SectorNum_t *startSector)
896 {
897 *row = 0;
898 *col = raidPtr->numCol - 1;
899 *startSector =
900 raidPtr->regionInfo[regionID].regionStartAddr + regionOffset;
901 }
902
903
904
905
906
907
908 void
909 rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
910 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector,
911 RF_SectorCount_t *numSector)
912 {
913 *row = 0;
914 *col = raidPtr->numCol - 2;
915 *startSector = raidPtr->regionInfo[regionID].parityStartAddr;
916 *numSector = raidPtr->regionInfo[regionID].numSectorsParity;
917 }
918
919
920
921
922
923
924 void
925 rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
926 RF_RowCol_t **diskids, RF_RowCol_t *outRow)
927 {
928 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout,
929 addr);
930 RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *)
931 raidPtr->Layout.layoutSpecificInfo;
932 *outRow = 0;
933 *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
934 }
935
936
937 void
938 rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr,
939 RF_StripeNum_t stripeID, RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
940 {
941 *which_ru = 0;
942 *psID = stripeID;
943 }
944
945
946
947
948
949
950
951 void
952 rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
953 RF_AccessStripeMap_t *asmp, RF_VoidFuncPtr *createFunc)
954 {
955 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
956 RF_PhysDiskAddr_t *failedPDA = NULL;
957 RF_RowCol_t frow, fcol;
958 RF_RowStatus_t rstat;
959 int prior_recon;
960
961 RF_ASSERT(RF_IO_IS_R_OR_W(type));
962
963 if (asmp->numDataFailed + asmp->numParityFailed > 1) {
964 RF_ERRORMSG("Multiple disks failed in a single group !"
965 " Aborting I/O operation.\n");
966 *createFunc = NULL;
967 return;
968 } else
969 if (asmp->numDataFailed + asmp->numParityFailed == 1) {
970
971
972
973
974
975
976 failedPDA = asmp->failedPDAs[0];
977 frow = failedPDA->row;
978 fcol = failedPDA->col;
979 rstat = raidPtr->status[failedPDA->row];
980 prior_recon = (rstat == rf_rs_reconfigured) ||
981 ((rstat == rf_rs_reconstructing) ?
982 rf_CheckRUReconstructed(raidPtr->reconControl[frow]
983 ->reconMap, failedPDA->startSector) : 0);
984 if (prior_recon) {
985 RF_RowCol_t or = failedPDA->row;
986 RF_RowCol_t oc = failedPDA->col;
987 RF_SectorNum_t oo = failedPDA->startSector;
988 if (layoutPtr->map->flags &
989 RF_DISTRIBUTE_SPARE) {
990
991
992 if (failedPDA == asmp->parityInfo) {
993
994
995 (layoutPtr->map->MapParity)
996 (raidPtr,
997 failedPDA->raidAddress,
998 &failedPDA->row,
999 &failedPDA->col,
1000 &failedPDA->startSector,
1001 RF_REMAP);
1002
1003 if (asmp->parityInfo->next) {
1004
1005
1006
1007
1008 RF_PhysDiskAddr_t *p =
1009 asmp->parityInfo->next;
1010 RF_SectorNum_t SUoffs =
1011 p->startSector %
1012 layoutPtr->sectorsPerStripeUnit;
1013 p->row = failedPDA->row;
1014 p->col = failedPDA->col;
1015
1016
1017
1018
1019
1020
1021 p->startSector =
1022 rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
1023 failedPDA->startSector) + SUoffs;
1024 }
1025 } else
1026 if (asmp->parityInfo->next &&
1027 failedPDA ==
1028 asmp->parityInfo->next) {
1029
1030
1031
1032
1033 RF_ASSERT(0);
1034 } else {
1035
1036 (layoutPtr->map
1037 ->MapSector)
1038 (raidPtr,
1039 failedPDA->raidAddress,
1040 &failedPDA->row,
1041 &failedPDA->col,
1042 &failedPDA->startSector,
1043 RF_REMAP);
1044 }
1045
1046 } else {
1047
1048
1049 failedPDA->row =
1050 raidPtr->Disks[frow][fcol].spareRow;
1051 failedPDA->col =
1052 raidPtr->Disks[frow][fcol].spareCol;
1053
1054
1055
1056
1057
1058
1059 if (asmp->parityInfo->next) {
1060 if (failedPDA ==
1061 asmp->parityInfo) {
1062 failedPDA->next->row =
1063 failedPDA->row;
1064 failedPDA->next->col =
1065 failedPDA->col;
1066 } else {
1067 if (failedPDA ==
1068 asmp->parityInfo
1069 ->next) {
1070
1071
1072
1073
1074
1075 asmp->parityInfo
1076 ->row =
1077 failedPDA->row;
1078 asmp->parityInfo
1079 ->col =
1080 failedPDA->col;
1081 }
1082 }
1083 }
1084 }
1085
1086 RF_ASSERT(failedPDA->col != -1);
1087
1088 if (rf_dagDebug || rf_mapDebug) {
1089 printf("raid%d: Redirected type '%c'"
1090 " r %d c %d o %ld -> r %d c %d"
1091 " o %ld\n", raidPtr->raidid,
1092 type, or, oc, (long) oo,
1093 failedPDA->row, failedPDA->col,
1094 (long) failedPDA->startSector);
1095 }
1096 asmp->numDataFailed = asmp->numParityFailed = 0;
1097 }
1098 }
1099 if (type == RF_IO_TYPE_READ) {
1100
1101 if (asmp->numDataFailed == 0)
1102 *createFunc =
1103 (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;
1104 else
1105 *createFunc =
1106 (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG;
1107
1108 } else {
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 if ((asmp->numDataFailed + asmp->numParityFailed) == 0) {
1122 if (((asmp->numStripeUnitsAccessed <=
1123 (layoutPtr->numDataCol / 2)) &&
1124 (layoutPtr->numDataCol != 1)) ||
1125 (asmp->parityInfo->next != NULL) ||
1126 rf_CheckStripeForFailures(raidPtr, asmp)) {
1127 *createFunc = (RF_VoidFuncPtr)
1128 rf_CreateParityLoggingSmallWriteDAG;
1129 } else
1130 *createFunc = (RF_VoidFuncPtr)
1131 rf_CreateParityLoggingLargeWriteDAG;
1132 } else
1133 if (asmp->numParityFailed == 1)
1134 *createFunc = (RF_VoidFuncPtr)
1135 rf_CreateNonRedundantWriteDAG;
1136 else
1137 if (asmp->numStripeUnitsAccessed != 1 &&
1138 failedPDA->numSector !=
1139 layoutPtr->sectorsPerStripeUnit)
1140 *createFunc = NULL;
1141 else
1142 *createFunc = (RF_VoidFuncPtr)
1143 rf_CreateDegradedWriteDAG;
1144 }
1145 }
1146 #endif