1 /* $OpenBSD: rf_paritylog.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */
2 /* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */
3
4 /*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: William V. Courtright II
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23 * School of Computer Science
24 * Carnegie Mellon University
25 * Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31 /*
32 * Code for manipulating in-core parity logs.
33 */
34
35 #include "rf_archs.h"
36
37 #if RF_INCLUDE_PARITYLOGGING > 0
38
39 /*
40 * Append-only log for recording parity "update" and "overwrite" records.
41 */
42
43 #include "rf_types.h"
44 #include "rf_threadstuff.h"
45 #include "rf_mcpair.h"
46 #include "rf_raid.h"
47 #include "rf_dag.h"
48 #include "rf_dagfuncs.h"
49 #include "rf_desc.h"
50 #include "rf_layout.h"
51 #include "rf_diskqueue.h"
52 #include "rf_etimer.h"
53 #include "rf_paritylog.h"
54 #include "rf_general.h"
55 #include "rf_map.h"
56 #include "rf_paritylogging.h"
57 #include "rf_paritylogDiskMgr.h"
58
59 RF_CommonLogData_t *rf_AllocParityLogCommonData(RF_Raid_t *);
60 void rf_FreeParityLogCommonData(RF_CommonLogData_t *);
61 RF_ParityLogData_t *rf_AllocParityLogData(RF_Raid_t *);
62 void rf_FreeParityLogData(RF_ParityLogData_t *);
63 void rf_EnqueueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **,
64 RF_ParityLogData_t **);
65 RF_ParityLogData_t *rf_DequeueParityLogData(RF_Raid_t *, RF_ParityLogData_t **,
66 RF_ParityLogData_t **, int);
67 void rf_RequeueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **,
68 RF_ParityLogData_t **);
69 RF_ParityLogData_t *rf_DequeueMatchingLogData(RF_Raid_t *,
70 RF_ParityLogData_t **, RF_ParityLogData_t **);
71 RF_ParityLog_t *rf_AcquireParityLog(RF_ParityLogData_t *, int);
72 void rf_ReintLog(RF_Raid_t *, int, RF_ParityLog_t *);
73 void rf_FlushLog(RF_Raid_t *, RF_ParityLog_t *);
74 int rf_DumpParityLogToDisk(int, RF_ParityLogData_t *);
75
76 RF_CommonLogData_t *
77 rf_AllocParityLogCommonData(RF_Raid_t *raidPtr)
78 {
79 RF_CommonLogData_t *common = NULL;
80 int rc;
81
82 /*
83 * Return a struct for holding common parity log information from the
84 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
85 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING
86 */
87
88 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
89 if (raidPtr->parityLogDiskQueue.freeCommonList) {
90 common = raidPtr->parityLogDiskQueue.freeCommonList;
91 raidPtr->parityLogDiskQueue.freeCommonList =
92 raidPtr->parityLogDiskQueue.freeCommonList->next;
93 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
94 } else {
95 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
96 RF_Malloc(common, sizeof(RF_CommonLogData_t),
97 (RF_CommonLogData_t *));
98 rc = rf_mutex_init(&common->mutex);
99 if (rc) {
100 RF_ERRORMSG3("Unable to init mutex file %s line %d"
101 " rc=%d\n", __FILE__, __LINE__, rc);
102 RF_Free(common, sizeof(RF_CommonLogData_t));
103 common = NULL;
104 }
105 }
106 common->next = NULL;
107 return (common);
108 }
109
110 void
111 rf_FreeParityLogCommonData(RF_CommonLogData_t *common)
112 {
113 RF_Raid_t *raidPtr;
114
115 /*
116 * Insert a single struct for holding parity log information (data)
117 * into the free list (rf_parityLogDiskQueue.freeCommonList).
118 * NON-BLOCKING
119 */
120
121 raidPtr = common->raidPtr;
122 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
123 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
124 raidPtr->parityLogDiskQueue.freeCommonList = common;
125 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
126 }
127
128 RF_ParityLogData_t *
129 rf_AllocParityLogData(RF_Raid_t *raidPtr)
130 {
131 RF_ParityLogData_t *data = NULL;
132
133 /*
134 * Return a struct for holding parity log information from the free
135 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
136 * call RF_Malloc to create a new structure. NON-BLOCKING
137 */
138
139 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
140 if (raidPtr->parityLogDiskQueue.freeDataList) {
141 data = raidPtr->parityLogDiskQueue.freeDataList;
142 raidPtr->parityLogDiskQueue.freeDataList =
143 raidPtr->parityLogDiskQueue.freeDataList->next;
144 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
145 } else {
146 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
147 RF_Malloc(data, sizeof(RF_ParityLogData_t),
148 (RF_ParityLogData_t *));
149 }
150 data->next = NULL;
151 data->prev = NULL;
152 return (data);
153 }
154
155
156 void
157 rf_FreeParityLogData(RF_ParityLogData_t *data)
158 {
159 RF_ParityLogData_t *nextItem;
160 RF_Raid_t *raidPtr;
161
162 /*
163 * Insert a linked list of structs for holding parity log information
164 * (data) into the free list (parityLogDiskQueue.freeList).
165 * NON-BLOCKING
166 */
167
168 raidPtr = data->common->raidPtr;
169 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
170 while (data) {
171 nextItem = data->next;
172 data->next = raidPtr->parityLogDiskQueue.freeDataList;
173 raidPtr->parityLogDiskQueue.freeDataList = data;
174 data = nextItem;
175 }
176 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
177 }
178
179
180 void
181 rf_EnqueueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head,
182 RF_ParityLogData_t **tail)
183 {
184 RF_Raid_t *raidPtr;
185
186 /*
187 * Insert an in-core parity log (*data) into the head of a disk queue
188 * (*head, *tail). NON-BLOCKING
189 */
190
191 raidPtr = data->common->raidPtr;
192 if (rf_parityLogDebug)
193 printf("[enqueueing parity log data, region %d,"
194 " raidAddress %d, numSector %d]\n", data->regionID,
195 (int) data->diskAddress.raidAddress,
196 (int) data->diskAddress.numSector);
197 RF_ASSERT(data->prev == NULL);
198 RF_ASSERT(data->next == NULL);
199 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
200 if (*head) {
201 /* Insert into head of queue. */
202 RF_ASSERT((*head)->prev == NULL);
203 RF_ASSERT((*tail)->next == NULL);
204 data->next = *head;
205 (*head)->prev = data;
206 *head = data;
207 } else {
208 /* Insert into empty list. */
209 RF_ASSERT(*head == NULL);
210 RF_ASSERT(*tail == NULL);
211 *head = data;
212 *tail = data;
213 }
214 RF_ASSERT((*head)->prev == NULL);
215 RF_ASSERT((*tail)->next == NULL);
216 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
217 }
218
219 RF_ParityLogData_t *
220 rf_DequeueParityLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
221 RF_ParityLogData_t **tail, int ignoreLocks)
222 {
223 RF_ParityLogData_t *data;
224
225 /*
226 * Remove and return an in-core parity log from the tail of a disk
227 * queue (*head, *tail). NON-BLOCKING
228 */
229
230 /* Remove from tail, preserving FIFO order. */
231 if (!ignoreLocks)
232 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
233 data = *tail;
234 if (data) {
235 if (*head == *tail) {
236 /* Removing last item from queue. */
237 *head = NULL;
238 *tail = NULL;
239 } else {
240 *tail = (*tail)->prev;
241 (*tail)->next = NULL;
242 RF_ASSERT((*head)->prev == NULL);
243 RF_ASSERT((*tail)->next == NULL);
244 }
245 data->next = NULL;
246 data->prev = NULL;
247 if (rf_parityLogDebug)
248 printf("[dequeueing parity log data, region %d,"
249 " raidAddress %d, numSector %d]\n", data->regionID,
250 (int) data->diskAddress.raidAddress,
251 (int) data->diskAddress.numSector);
252 }
253 if (*head) {
254 RF_ASSERT((*head)->prev == NULL);
255 RF_ASSERT((*tail)->next == NULL);
256 }
257 if (!ignoreLocks)
258 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
259 return (data);
260 }
261
262
263 void
264 rf_RequeueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head,
265 RF_ParityLogData_t **tail)
266 {
267 RF_Raid_t *raidPtr;
268
269 /*
270 * Insert an in-core parity log (*data) into the tail of a disk queue
271 * (*head, *tail). NON-BLOCKING
272 */
273
274 raidPtr = data->common->raidPtr;
275 RF_ASSERT(data);
276 if (rf_parityLogDebug)
277 printf("[requeueing parity log data, region %d,"
278 " raidAddress %d, numSector %d]\n", data->regionID,
279 (int) data->diskAddress.raidAddress,
280 (int) data->diskAddress.numSector);
281 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
282 if (*tail) {
283 /* Append to tail of list. */
284 data->prev = *tail;
285 data->next = NULL;
286 (*tail)->next = data;
287 *tail = data;
288 } else {
289 /* Inserting into an empty list. */
290 *head = data;
291 *tail = data;
292 (*head)->prev = NULL;
293 (*tail)->next = NULL;
294 }
295 RF_ASSERT((*head)->prev == NULL);
296 RF_ASSERT((*tail)->next == NULL);
297 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
298 }
299
300 RF_ParityLogData_t *
301 rf_CreateParityLogData(RF_ParityRecordType_t operation, RF_PhysDiskAddr_t *pda,
302 caddr_t bufPtr, RF_Raid_t *raidPtr,
303 int (*wakeFunc) (RF_DagNode_t * node, int status),
304 void *wakeArg, RF_AccTraceEntry_t *tracerec, RF_Etimer_t startTime)
305 {
306 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
307 RF_CommonLogData_t *common;
308 RF_PhysDiskAddr_t *diskAddress;
309 int boundary, offset = 0;
310
311 /*
312 * Return an initialized struct of info to be logged. Build one item
313 * per physical disk address, one item per region.
314 *
315 * NON-BLOCKING
316 */
317
318 diskAddress = pda;
319 common = rf_AllocParityLogCommonData(raidPtr);
320 RF_ASSERT(common);
321
322 common->operation = operation;
323 common->bufPtr = bufPtr;
324 common->raidPtr = raidPtr;
325 common->wakeFunc = wakeFunc;
326 common->wakeArg = wakeArg;
327 common->tracerec = tracerec;
328 common->startTime = startTime;
329 common->cnt = 0;
330
331 if (rf_parityLogDebug)
332 printf("[entering CreateParityLogData]\n");
333 while (diskAddress) {
334 common->cnt++;
335 data = rf_AllocParityLogData(raidPtr);
336 RF_ASSERT(data);
337 data->common = common;
338 data->next = NULL;
339 data->prev = NULL;
340 data->regionID = rf_MapRegionIDParityLogging(raidPtr,
341 diskAddress->startSector);
342 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr,
343 diskAddress->startSector + diskAddress->numSector - 1)) {
344 /* Disk address does not cross a region boundary. */
345 data->diskAddress = *diskAddress;
346 data->bufOffset = offset;
347 offset = offset + diskAddress->numSector;
348 rf_EnqueueParityLogData(data, &resultHead, &resultTail);
349 /* Adjust disk address. */
350 diskAddress = diskAddress->next;
351 } else {
352 /* Disk address crosses a region boundary. */
353 /* Find address where region is crossed. */
354 boundary = 0;
355 while (data->regionID ==
356 rf_MapRegionIDParityLogging(raidPtr,
357 diskAddress->startSector + boundary))
358 boundary++;
359
360 /* Enter data before the boundary. */
361 data->diskAddress = *diskAddress;
362 data->diskAddress.numSector = boundary;
363 data->bufOffset = offset;
364 offset += boundary;
365 rf_EnqueueParityLogData(data, &resultHead, &resultTail);
366 /* Adjust disk address. */
367 diskAddress->startSector += boundary;
368 diskAddress->numSector -= boundary;
369 }
370 }
371 if (rf_parityLogDebug)
372 printf("[leaving CreateParityLogData]\n");
373 return (resultHead);
374 }
375
376
377 RF_ParityLogData_t *
378 rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr, int regionID,
379 RF_ParityLogData_t **head, RF_ParityLogData_t **tail, int ignoreLocks)
380 {
381 RF_ParityLogData_t *w;
382
383 /*
384 * Remove and return an in-core parity log from a specified region
385 * (regionID). If a matching log is not found, return NULL.
386 *
387 * NON-BLOCKING
388 */
389
390 /*
391 * walk backward through a list, looking for an entry with a matching
392 * region ID.
393 */
394 if (!ignoreLocks)
395 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
396 w = (*tail);
397 while (w) {
398 if (w->regionID == regionID) {
399 /* Remove an element from the list. */
400 if (w == *tail) {
401 if (*head == *tail) {
402 /* Removing only element in the list. */
403 *head = NULL;
404 *tail = NULL;
405 } else {
406 /* Removing last item in the list. */
407 *tail = (*tail)->prev;
408 (*tail)->next = NULL;
409 RF_ASSERT((*head)->prev == NULL);
410 RF_ASSERT((*tail)->next == NULL);
411 }
412 } else {
413 if (w == *head) {
414 /* Removing first item in the list. */
415 *head = (*head)->next;
416 (*head)->prev = NULL;
417 RF_ASSERT((*head)->prev == NULL);
418 RF_ASSERT((*tail)->next == NULL);
419 } else {
420 /*
421 * Removing an item from the middle of
422 * the list.
423 */
424 w->prev->next = w->next;
425 w->next->prev = w->prev;
426 RF_ASSERT((*head)->prev == NULL);
427 RF_ASSERT((*tail)->next == NULL);
428 }
429 }
430 w->prev = NULL;
431 w->next = NULL;
432 if (rf_parityLogDebug)
433 printf("[dequeueing parity log data,"
434 " region %d, raidAddress %d,"
435 " numSector %d]\n", w->regionID,
436 (int) w->diskAddress.raidAddress,
437 (int) w->diskAddress.numSector);
438 return (w);
439 } else
440 w = w->prev;
441 }
442 if (!ignoreLocks)
443 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
444 return (NULL);
445 }
446
447 RF_ParityLogData_t *
448 rf_DequeueMatchingLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
449 RF_ParityLogData_t **tail)
450 {
451 RF_ParityLogData_t *logDataList, *logData;
452 int regionID;
453
454 /*
455 * Remove and return an in-core parity log from the tail of a disk
456 * queue (*head, *tail). Then remove all matching (identical
457 * regionIDs) logData and return as a linked list.
458 *
459 * NON-BLOCKING
460 */
461
462 logDataList = rf_DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
463 if (logDataList) {
464 regionID = logDataList->regionID;
465 logData = logDataList;
466 logData->next = rf_SearchAndDequeueParityLogData(raidPtr,
467 regionID, head, tail, RF_TRUE);
468 while (logData->next) {
469 logData = logData->next;
470 logData->next =
471 rf_SearchAndDequeueParityLogData(raidPtr, regionID,
472 head, tail, RF_TRUE);
473 }
474 }
475 return (logDataList);
476 }
477
478
479 RF_ParityLog_t *
480 rf_AcquireParityLog(RF_ParityLogData_t *logData, int finish)
481 {
482 RF_ParityLog_t *log = NULL;
483 RF_Raid_t *raidPtr;
484
485 /*
486 * Grab a log buffer from the pool and return it. If no buffers are
487 * available, return NULL. NON-BLOCKING
488 */
489 raidPtr = logData->common->raidPtr;
490 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
491 if (raidPtr->parityLogPool.parityLogs) {
492 log = raidPtr->parityLogPool.parityLogs;
493 raidPtr->parityLogPool.parityLogs =
494 raidPtr->parityLogPool.parityLogs->next;
495 log->regionID = logData->regionID;
496 log->numRecords = 0;
497 log->next = NULL;
498 raidPtr->logsInUse++;
499 RF_ASSERT(raidPtr->logsInUse >= 0 &&
500 raidPtr->logsInUse <= raidPtr->numParityLogs);
501 } else {
502 /*
503 * No logs available, so place ourselves on the queue of work
504 * waiting on log buffers this is done while
505 * parityLogPool.mutex is held, to ensure synchronization with
506 * ReleaseParityLogs.
507 */
508 if (rf_parityLogDebug)
509 printf("[blocked on log, region %d, finish %d]\n",
510 logData->regionID, finish);
511 if (finish)
512 rf_RequeueParityLogData(logData,
513 &raidPtr->parityLogDiskQueue.logBlockHead,
514 &raidPtr->parityLogDiskQueue.logBlockTail);
515 else
516 rf_EnqueueParityLogData(logData,
517 &raidPtr->parityLogDiskQueue.logBlockHead,
518 &raidPtr->parityLogDiskQueue.logBlockTail);
519 }
520 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
521 return (log);
522 }
523
524 void
525 rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog)
526 {
527 RF_ParityLogData_t *logDataList;
528 RF_ParityLog_t *log, *lastLog;
529 int cnt;
530
531 /*
532 * Insert a linked list of parity logs (firstLog) to the free list
533 * (parityLogPool.parityLogPool)
534 *
535 * NON-BLOCKING
536 */
537
538 RF_ASSERT(firstLog);
539
540 /*
541 * Before returning logs to global free list, service all requests
542 * which are blocked on logs. Holding mutexes for parityLogPool and
543 * parityLogDiskQueue forces synchronization with rf_AcquireParityLog().
544 */
545 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
546 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
547 logDataList = rf_DequeueMatchingLogData(raidPtr,
548 &raidPtr->parityLogDiskQueue.logBlockHead,
549 &raidPtr->parityLogDiskQueue.logBlockTail);
550 log = firstLog;
551 if (firstLog)
552 firstLog = firstLog->next;
553 log->numRecords = 0;
554 log->next = NULL;
555 while (logDataList && log) {
556 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
557 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
558 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
559 if (rf_parityLogDebug)
560 printf("[finishing up buf-blocked log data,"
561 " region %d]\n", logDataList->regionID);
562 if (log == NULL) {
563 log = firstLog;
564 if (firstLog) {
565 firstLog = firstLog->next;
566 log->numRecords = 0;
567 log->next = NULL;
568 }
569 }
570 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
571 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
572 if (log)
573 logDataList = rf_DequeueMatchingLogData(raidPtr,
574 &raidPtr->parityLogDiskQueue.logBlockHead,
575 &raidPtr->parityLogDiskQueue.logBlockTail);
576 }
577 /* Return remaining logs to pool. */
578 if (log) {
579 log->next = firstLog;
580 firstLog = log;
581 }
582 if (firstLog) {
583 lastLog = firstLog;
584 raidPtr->logsInUse--;
585 RF_ASSERT(raidPtr->logsInUse >= 0 &&
586 raidPtr->logsInUse <= raidPtr->numParityLogs);
587 while (lastLog->next) {
588 lastLog = lastLog->next;
589 raidPtr->logsInUse--;
590 RF_ASSERT(raidPtr->logsInUse >= 0 &&
591 raidPtr->logsInUse <= raidPtr->numParityLogs);
592 }
593 lastLog->next = raidPtr->parityLogPool.parityLogs;
594 raidPtr->parityLogPool.parityLogs = firstLog;
595 cnt = 0;
596 log = raidPtr->parityLogPool.parityLogs;
597 while (log) {
598 cnt++;
599 log = log->next;
600 }
601 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
602 }
603 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
604 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
605 }
606
607 void
608 rf_ReintLog(RF_Raid_t *raidPtr, int regionID, RF_ParityLog_t *log)
609 {
610 RF_ASSERT(log);
611
612 /*
613 * Insert an in-core parity log (log) into the disk queue of
614 * reintegration work. Set the flag (reintInProgress) for the
615 * specified region (regionID) to indicate that reintegration is in
616 * progress for this region. NON-BLOCKING
617 */
618
619 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
620 /* Cleared when reint complete. */
621 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;
622
623 if (rf_parityLogDebug)
624 printf("[requesting reintegration of region %d]\n",
625 log->regionID);
626 /* Move record to reintegration queue. */
627 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
628 log->next = raidPtr->parityLogDiskQueue.reintQueue;
629 raidPtr->parityLogDiskQueue.reintQueue = log;
630 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
632 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
633 }
634
635 void
636 rf_FlushLog(RF_Raid_t *raidPtr, RF_ParityLog_t *log)
637 {
638 /*
639 * Insert a core log (log) into a list of logs
640 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
641 * NON-BLOCKING
642 */
643
644 RF_ASSERT(log);
645 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
646 RF_ASSERT(log->next == NULL);
647 /* Move log to flush queue. */
648 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
649 log->next = raidPtr->parityLogDiskQueue.flushQueue;
650 raidPtr->parityLogDiskQueue.flushQueue = log;
651 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
652 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
653 }
654
655 int
656 rf_DumpParityLogToDisk(int finish, RF_ParityLogData_t *logData)
657 {
658 int i, diskCount, regionID = logData->regionID;
659 RF_ParityLog_t *log;
660 RF_Raid_t *raidPtr;
661
662 raidPtr = logData->common->raidPtr;
663
664 /*
665 * Move a core log to disk. If the log disk is full, initiate
666 * reintegration.
667 *
668 * Return (0) if we can enqueue the dump immediately, otherwise return
669 * (1) to indicate we are blocked on reintegration and control of the
670 * thread should be relinquished.
671 *
672 * Caller must hold regionInfo[regionID].mutex.
673 *
674 * NON-BLOCKING
675 */
676
677 if (rf_parityLogDebug)
678 printf("[dumping parity log to disk, region %d]\n", regionID);
679 log = raidPtr->regionInfo[regionID].coreLog;
680 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
681 RF_ASSERT(log->next == NULL);
682
683 /* If reintegration is in progress, must queue work. */
684 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
685 if (raidPtr->regionInfo[regionID].reintInProgress) {
686 /*
687 * Can not proceed since this region is currently being
688 * reintegrated. We can not block, so queue remaining work and
689 * return.
690 */
691 if (rf_parityLogDebug)
692 printf("[region %d waiting on reintegration]\n",
693 regionID);
694 /*
695 * XXX Not sure about the use of finish - shouldn't this
696 * always be "Enqueue" ?
697 */
698 if (finish)
699 rf_RequeueParityLogData(logData,
700 &raidPtr->parityLogDiskQueue.reintBlockHead,
701 &raidPtr->parityLogDiskQueue.reintBlockTail);
702 else
703 rf_EnqueueParityLogData(logData,
704 &raidPtr->parityLogDiskQueue.reintBlockHead,
705 &raidPtr->parityLogDiskQueue.reintBlockTail);
706 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
707 return (1); /* Relenquish control of this thread. */
708 }
709 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
710 raidPtr->regionInfo[regionID].coreLog = NULL;
711 if ((raidPtr->regionInfo[regionID].diskCount) <
712 raidPtr->regionInfo[regionID].capacity)
713 /*
714 * IMPORTANT !!! This loop bound assumes region disk holds an
715 * integral number of core logs.
716 */
717 {
718 /* Update disk map for this region. */
719 diskCount = raidPtr->regionInfo[regionID].diskCount;
720 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
721 raidPtr->regionInfo[regionID].diskMap[i + diskCount]
722 .operation = log->records[i].operation;
723 raidPtr->regionInfo[regionID].diskMap[i + diskCount]
724 .parityAddr = log->records[i].parityAddr;
725 }
726 log->diskOffset = diskCount;
727 raidPtr->regionInfo[regionID].diskCount +=
728 raidPtr->numSectorsPerLog;
729 rf_FlushLog(raidPtr, log);
730 } else {
731 /*
732 * No room for log on disk, send it to disk manager and
733 * request reintegration.
734 */
735 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount ==
736 raidPtr->regionInfo[regionID].capacity);
737 rf_ReintLog(raidPtr, regionID, log);
738 }
739 if (rf_parityLogDebug)
740 printf("[finished dumping parity log to disk, region %d]\n",
741 regionID);
742 return (0);
743 }
744
745 int
746 rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish,
747 RF_ParityLog_t **incomingLog, int clearReintFlag)
748 {
749 int regionID, logItem, itemDone;
750 RF_ParityLogData_t *item;
751 int punt, done = RF_FALSE;
752 RF_ParityLog_t *log;
753 RF_Raid_t *raidPtr;
754 RF_Etimer_t timer;
755 int (*wakeFunc) (RF_DagNode_t * node, int status);
756 void *wakeArg;
757
758 /*
759 * Add parity to the appropriate log, one sector at a time. This
760 * routine is called is called by dag functions ParityLogUpdateFunc
761 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
762 *
763 * Parity to be logged is contained in a linked-list (logData). When
764 * this routine returns, every sector in the list will be in one of
765 * three places: 1) entered into the parity log 2) queued, waiting on
766 * reintegration 3) queued, waiting on a core log.
767 *
768 * Blocked work is passed to the ParityLoggingDiskManager for
769 * completion. Later, as conditions which required the block are
770 * removed, the work reenters this routine with the "finish" parameter
771 * set to "RF_TRUE."
772 *
773 * NON-BLOCKING
774 */
775
776 raidPtr = logData->common->raidPtr;
777 /* Lock the region for the first item in logData. */
778 RF_ASSERT(logData != NULL);
779 regionID = logData->regionID;
780 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
781 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
782
783 if (clearReintFlag) {
784 /*
785 * Enable flushing for this region. Holding both locks
786 * provides a synchronization barrier with
787 * rf_DumpParityLogToDisk.
788 */
789 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
790 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
791 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress ==
792 RF_TRUE);
793 raidPtr->regionInfo[regionID].diskCount = 0;
794 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
795 /* Flushing is now enabled. */
796 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
797 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
798 }
799 /* Process each item in logData. */
800 while (logData) {
801 /* Remove an item from logData. */
802 item = logData;
803 logData = logData->next;
804 item->next = NULL;
805 item->prev = NULL;
806
807 if (rf_parityLogDebug)
808 printf("[appending parity log data, region %d,"
809 " raidAddress %d, numSector %d]\n", item->regionID,
810 (int) item->diskAddress.raidAddress,
811 (int) item->diskAddress.numSector);
812
813 /* See if we moved to a new region. */
814 if (regionID != item->regionID) {
815 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
816 regionID = item->regionID;
817 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
818 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
819 }
820 punt = RF_FALSE;/*
821 * Set to RF_TRUE if work is blocked. This
822 * can happen in one of two ways: 1) no core
823 * log (rf_AcquireParityLog) 2) waiting on
824 * reintegration (rf_DumpParityLogToDisk).
825 * If punt is RF_TRUE, the dataItem was queued,
826 * so skip to next item.
827 */
828
829 /*
830 * Process item, one sector at a time, until all sectors
831 * processed or we punt.
832 */
833 if (item->diskAddress.numSector > 0)
834 done = RF_FALSE;
835 else
836 RF_ASSERT(0);
837 while (!punt && !done) {
838 /* Verify that a core log exists for this region. */
839 if (!raidPtr->regionInfo[regionID].coreLog) {
840 /*
841 * Attempt to acquire a parity log. If
842 * acquisition fails, queue remaining work in
843 * data item and move to nextItem.
844 */
845 if (incomingLog) {
846 if (*incomingLog) {
847 RF_ASSERT((*incomingLog)->next
848 == NULL);
849 raidPtr->regionInfo[regionID]
850 .coreLog = *incomingLog;
851 raidPtr->regionInfo[regionID]
852 .coreLog->regionID =
853 regionID;
854 *incomingLog = NULL;
855 } else
856 raidPtr->regionInfo[regionID]
857 .coreLog =
858 rf_AcquireParityLog(item,
859 finish);
860 } else
861 raidPtr->regionInfo[regionID].coreLog =
862 rf_AcquireParityLog(item, finish);
863 /*
864 * Note: rf_AcquireParityLog either returns
865 * a log or enqueues currentItem.
866 */
867 }
868 if (!raidPtr->regionInfo[regionID].coreLog)
869 punt = RF_TRUE; /* Failed to find a core log. */
870 else {
871 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog
872 ->next == NULL);
873 /*
874 * Verify that the log has room for new
875 * entries.
876 */
877 /*
878 * If log is full, dump it to disk and grab a
879 * new log.
880 */
881 if (raidPtr->regionInfo[regionID].coreLog
882 ->numRecords == raidPtr->numSectorsPerLog)
883 {
884 /* Log is full, dump it to disk. */
885 if (rf_DumpParityLogToDisk(finish,
886 item))
887 /*
888 * Dump unsuccessful, blocked
889 * on reintegration.
890 */
891 punt = RF_TRUE;
892 else {
893 /* Dump was successful. */
894 if (incomingLog) {
895 if (*incomingLog) {
896 RF_ASSERT(
897 (*incomingLog)->next ==
898 NULL);
899 raidPtr->
900 regionInfo[regionID].coreLog =
901 *incomingLog;
902 raidPtr->
903 regionInfo[regionID].coreLog->
904 regionID = regionID;
905 *incomingLog =
906 NULL;
907 } else
908 raidPtr->
909 regionInfo[regionID].coreLog =
910 rf_AcquireParityLog(item,
911 finish);
912 } else
913 raidPtr->regionInfo
914 [regionID].coreLog =
915 rf_AcquireParityLog(item,
916 finish);
917 /*
918 * If a core log is not
919 * available, must queue work
920 * and return.
921 */
922 if (!raidPtr->regionInfo
923 [regionID].coreLog)
924 /*
925 * Blocked on log
926 * availability.
927 */
928 punt = RF_TRUE;
929 }
930 }
931 }
932 /*
933 * If we didn't punt on this item, attempt to add a
934 * sector to the core log.
935 */
936 if (!punt) {
937 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog
938 ->next == NULL);
939 /*
940 * At this point, we have a core log with
941 * enough room for a sector.
942 */
943 /* Copy a sector into the log. */
944 log = raidPtr->regionInfo[regionID].coreLog;
945 RF_ASSERT(log->numRecords <
946 raidPtr->numSectorsPerLog);
947 logItem = log->numRecords++;
948 log->records[logItem].parityAddr =
949 item->diskAddress;
950 RF_ASSERT(log->records[logItem].parityAddr
951 .startSector >=
952 raidPtr->regionInfo[regionID]
953 .parityStartAddr);
954 RF_ASSERT(log->records[logItem].parityAddr
955 .startSector <
956 raidPtr->regionInfo[regionID]
957 .parityStartAddr +
958 raidPtr->regionInfo[regionID]
959 .numSectorsParity);
960 log->records[logItem].parityAddr.numSector = 1;
961 log->records[logItem].operation =
962 item->common->operation;
963 bcopy((item->common->bufPtr +
964 (item->bufOffset++ * (1 <<
965 item->common->raidPtr->logBytesPerSector))),
966 log->bufPtr + (logItem * (1 <<
967 item->common->raidPtr->logBytesPerSector)),
968 (1 << item->common->raidPtr
969 ->logBytesPerSector));
970 item->diskAddress.numSector--;
971 item->diskAddress.startSector++;
972 if (item->diskAddress.numSector == 0)
973 done = RF_TRUE;
974 }
975 }
976
977 if (!punt) {
978 /*
979 * Processed this item completely, decrement count of
980 * items to be processed.
981 */
982 RF_ASSERT(item->diskAddress.numSector == 0);
983 RF_LOCK_MUTEX(item->common->mutex);
984 item->common->cnt--;
985 if (item->common->cnt == 0)
986 itemDone = RF_TRUE;
987 else
988 itemDone = RF_FALSE;
989 RF_UNLOCK_MUTEX(item->common->mutex);
990 if (itemDone) {
991 /*
992 * Finished processing all log data for this
993 * IO Return structs to free list and invoke
994 * wakeup function.
995 */
996 /* Grab initial value of timer. */
997 timer = item->common->startTime;
998 RF_ETIMER_STOP(timer);
999 RF_ETIMER_EVAL(timer);
1000 item->common->tracerec->plog_us +=
1001 RF_ETIMER_VAL_US(timer);
1002 if (rf_parityLogDebug)
1003 printf("[waking process for region"
1004 " %d]\n", item->regionID);
1005 wakeFunc = item->common->wakeFunc;
1006 wakeArg = item->common->wakeArg;
1007 rf_FreeParityLogCommonData(item->common);
1008 rf_FreeParityLogData(item);
1009 (wakeFunc) (wakeArg, 0);
1010 } else
1011 rf_FreeParityLogData(item);
1012 }
1013 }
1014 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1015 if (rf_parityLogDebug)
1016 printf("[exiting ParityLogAppend]\n");
1017 return (0);
1018 }
1019
1020
1021 void
1022 rf_EnableParityLogging(RF_Raid_t *raidPtr)
1023 {
1024 int regionID;
1025
1026 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
1027 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1028 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
1029 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1030 }
1031 if (rf_parityLogDebug)
1032 printf("[parity logging enabled]\n");
1033 }
1034 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */