1 /* $OpenBSD: rf_reconbuffer.c,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */
2 /* $NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $ */
3
4 /*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Mark Holland
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23 * School of Computer Science
24 * Carnegie Mellon University
25 * Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31 /*****************************************************
32 *
33 * rf_reconbuffer.c -- Reconstruction buffer manager.
34 *
35 *****************************************************/
36
37 #include "rf_raid.h"
38 #include "rf_reconbuffer.h"
39 #include "rf_acctrace.h"
40 #include "rf_etimer.h"
41 #include "rf_general.h"
42 #include "rf_debugprint.h"
43 #include "rf_revent.h"
44 #include "rf_reconutil.h"
45 #include "rf_nwayxor.h"
46
47 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s, a)
48 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s, a, b)
49 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s, a, b, c)
50 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s, a, b, c, d)
51 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s, a, b, c, d, e)
52
53 /*****************************************************************************
54 *
55 * Submit a reconstruction buffer to the manager for XOR.
56 * We can only submit a buffer if (1) we can xor into an existing buffer,
57 * which means we don't have to acquire a new one, (2) we can acquire a
58 * floating recon buffer, or (3) the caller has indicated that we are allowed
59 * to keep the submitted buffer.
60 *
61 * Returns non-zero if and only if we were not able to submit.
62 * In this case, we append the current disk ID to the wait list on the
63 * indicated RU, so that it will be re-enabled when we acquire a buffer for
64 * this RU.
65 *
66 *****************************************************************************/
67
68 /* Just to make the code below more readable. */
69 #define BUFWAIT_APPEND(_cb_,_pssPtr_,_row_,_col_) \
70 do { \
71 _cb_ = rf_AllocCallbackDesc(); \
72 (_cb_)->row = (_row_); \
73 (_cb_)->col = (_col_); \
74 (_cb_)->next = (_pssPtr_)->bufWaitList; \
75 (_pssPtr_)->bufWaitList = (_cb_); \
76 } while (0)
77
78 /*
79 * rf_nWayXorFuncs[i] is a pointer to a function that will xor "i"
80 * bufs into the accumulating sum.
81 */
82 static RF_VoidFuncPtr rf_nWayXorFuncs[] = {
83 NULL,
84 (RF_VoidFuncPtr) rf_nWayXor1,
85 (RF_VoidFuncPtr) rf_nWayXor2,
86 (RF_VoidFuncPtr) rf_nWayXor3,
87 (RF_VoidFuncPtr) rf_nWayXor4,
88 (RF_VoidFuncPtr) rf_nWayXor5,
89 (RF_VoidFuncPtr) rf_nWayXor6,
90 (RF_VoidFuncPtr) rf_nWayXor7,
91 (RF_VoidFuncPtr) rf_nWayXor8,
92 (RF_VoidFuncPtr) rf_nWayXor9
93 };
94
95
96 int
97 rf_SubmitReconBuffer(
98 RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */
99 int keep_it, /*
100 * Whether we can keep this buffer or
101 * we have to return it.
102 */
103 int use_committed /*
104 * Whether to use a committed or an
105 * available recon buffer.
106 */
107 )
108 {
109 RF_LayoutSW_t *lp;
110 int rc;
111
112 lp = rbuf->raidPtr->Layout.map;
113 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
114 return (rc);
115 }
116
117 int
118 rf_SubmitReconBufferBasic(
119 RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */
120 int keep_it, /*
121 * Whether we can keep this buffer
122 * or we have to return it.
123 */
124 int use_committed /*
125 * Whether to use a committed or
126 * an available recon buffer.
127 */
128 )
129 {
130 RF_Raid_t *raidPtr = rbuf->raidPtr;
131 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
132 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
133 RF_ReconParityStripeStatus_t *pssPtr;
134 /* Temporary rbuf pointers. */
135 RF_ReconBuffer_t *targetRbuf, *t = NULL;
136 /* Temporary data buffer pointer. */
137 caddr_t ta;
138 RF_CallbackDesc_t *cb, *p;
139 int retcode = 0, created = 0;
140
141 RF_Etimer_t timer;
142
143 /* Makes no sense to have a submission from the failed disk. */
144 RF_ASSERT(rbuf);
145 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
146
147 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d"
148 " (failed offset %ld).\n", rbuf->row, rbuf->col,
149 (long) rbuf->parityStripeID, rbuf->which_ru,
150 (long) rbuf->failedDiskSectorOffset);
151
152 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
153
154 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
155
156 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
157 rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
158 RF_ASSERT(pssPtr); /*
159 * If it didn't exist, we wouldn't have gotten
160 * an rbuf for it.
161 */
162
163 /*
164 * Check to see if enough buffers have accumulated to do an XOR. If
165 * so, there's no need to acquire a floating rbuf. Before we can do
166 * any XORing, we must have acquired a destination buffer. If we
167 * have, then we can go ahead and do the XOR if (1) including this
168 * buffer, enough bufs have accumulated, or (2) this is the last
169 * submission for this stripe. Otherwise, we have to go acquire a
170 * floating rbuf.
171 */
172
173 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
174 if ((targetRbuf != NULL) &&
175 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) ||
176 (targetRbuf->count + pssPtr->xorBufCount + 1 ==
177 layoutPtr->numDataCol))) {
178 /* Install this buffer. */
179 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;
180 Dprintf3("RECON: row %d col %d invoking a %d-way XOR.\n",
181 rbuf->row, rbuf->col, pssPtr->xorBufCount);
182 RF_ETIMER_START(timer);
183 rf_MultiWayReconXor(raidPtr, pssPtr);
184 RF_ETIMER_STOP(timer);
185 RF_ETIMER_EVAL(timer);
186 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
187 if (!keep_it) {
188 raidPtr->recon_tracerecs[rbuf->col].xor_us =
189 RF_ETIMER_VAL_US(timer);
190 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col]
191 .recon_timer);
192 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col]
193 .recon_timer);
194 raidPtr->recon_tracerecs[rbuf->col]
195 .specific.recon.recon_return_to_submit_us +=
196 RF_ETIMER_VAL_US(raidPtr
197 ->recon_tracerecs[rbuf->col].recon_timer);
198 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col]
199 .recon_timer);
200
201 rf_LogTraceRec(raidPtr,
202 &raidPtr->recon_tracerecs[rbuf->col]);
203 }
204 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr,
205 layoutPtr->numDataCol);
206
207 /*
208 * If use_committed is on, we _must_ consume a buffer off the
209 * committed list.
210 */
211 if (use_committed) {
212 t = reconCtrlPtr->committedRbufs;
213 RF_ASSERT(t);
214 reconCtrlPtr->committedRbufs = t->next;
215 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
216 }
217 if (keep_it) {
218 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row,
219 rbuf->parityStripeID);
220 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
221 rf_FreeReconBuffer(rbuf);
222 return (retcode);
223 }
224 goto out;
225 }
226 /* Set the value of "t", which we'll use as the rbuf from here on. */
227 if (keep_it) {
228 t = rbuf;
229 } else {
230 if (use_committed) {
231 /* If a buffer has been committed to us, use it. */
232
233 t = reconCtrlPtr->committedRbufs;
234 RF_ASSERT(t);
235 reconCtrlPtr->committedRbufs = t->next;
236 t->next = NULL;
237 } else
238 if (reconCtrlPtr->floatingRbufs) {
239 t = reconCtrlPtr->floatingRbufs;
240 reconCtrlPtr->floatingRbufs = t->next;
241 t->next = NULL;
242 }
243 }
244
245 /*
246 * If we weren't able to acquire a buffer, append to the end of the
247 * buf list in the recon ctrl struct.
248 */
249 if (!t) {
250 RF_ASSERT(!keep_it && !use_committed);
251 Dprintf2("RECON: row %d col %d failed to acquire floating"
252 " rbuf.\n", rbuf->row, rbuf->col);
253
254 raidPtr->procsInBufWait++;
255 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) &&
256 (raidPtr->numFullReconBuffers == 0)) {
257 printf("Buffer wait deadlock detected. Exiting.\n");
258 rf_PrintPSStatusTable(raidPtr, rbuf->row);
259 RF_PANIC();
260 }
261 pssPtr->flags |= RF_PSS_BUFFERWAIT;
262 /* Append to buf wait list in recon ctrl structure. */
263 cb = rf_AllocCallbackDesc();
264 cb->row = rbuf->row;
265 cb->col = rbuf->col;
266 cb->callbackArg.v = rbuf->parityStripeID;
267 cb->callbackArg2.v = rbuf->which_ru;
268 cb->next = NULL;
269 if (!reconCtrlPtr->bufferWaitList)
270 reconCtrlPtr->bufferWaitList = cb;
271 else {
272 /*
273 * Might want to maintain head/tail pointers
274 * here rather than search for end of list.
275 */
276 for (p = reconCtrlPtr->bufferWaitList; p->next;
277 p = p->next);
278 p->next = cb;
279 }
280 retcode = 1;
281 goto out;
282 }
283 Dprintf2("RECON: row %d col %d acquired rbuf.\n", rbuf->row, rbuf->col);
284 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
285 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
286 raidPtr->recon_tracerecs[rbuf->col]
287 .specific.recon.recon_return_to_submit_us +=
288 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
289 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
290
291 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
292
293 /* Initialize the buffer. */
294 if (t != rbuf) {
295 t->row = rbuf->row;
296 t->col = reconCtrlPtr->fcol;
297 t->parityStripeID = rbuf->parityStripeID;
298 t->which_ru = rbuf->which_ru;
299 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
300 t->spRow = rbuf->spRow;
301 t->spCol = rbuf->spCol;
302 t->spOffset = rbuf->spOffset;
303
304 ta = t->buffer;
305 t->buffer = rbuf->buffer;
306 rbuf->buffer = ta; /* Swap buffers. */
307 }
308 /*
309 * The first installation always gets installed as the destination
310 * buffer. Subsequent installations get stacked up to allow for
311 * multi-way XOR.
312 */
313 if (!pssPtr->rbuf) {
314 pssPtr->rbuf = t;
315 t->count = 1;
316 } else
317 /* Install this buffer. */
318 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;
319
320 /* The buffer is full if G=2. */
321 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr,
322 layoutPtr->numDataCol);
323
324 out:
325 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
326 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
327 return (retcode);
328 }
329
330 int
331 rf_MultiWayReconXor(
332 RF_Raid_t *raidPtr,
333 RF_ReconParityStripeStatus_t *pssPtr /*
334 * The pss descriptor for this
335 * parity stripe.
336 */
337 )
338 {
339 int i, numBufs = pssPtr->xorBufCount;
340 int numBytes = rf_RaidAddressToByte(raidPtr,
341 raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
342 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
343 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
344
345 RF_ASSERT(pssPtr->rbuf != NULL);
346 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
347 #ifdef _KERNEL
348 #if !defined(__NetBSD__) && !defined(__OpenBSD__)
349 /* Yield the processor before doing a big XOR. */
350 thread_block();
351 #endif
352 #endif /* _KERNEL */
353 /*
354 * XXX
355 *
356 * What if more than 9 bufs ?
357 */
358 rf_nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf,
359 numBytes / sizeof(long));
360
361 /*
362 * Release all the reconstruction buffers except the last one, which
363 * belongs to the disk whose submission caused this XOR to take place.
364 */
365 for (i = 0; i < numBufs - 1; i++) {
366 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
367 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row,
368 rbufs[i]);
369 else
370 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
371 rf_FreeReconBuffer(rbufs[i]);
372 else
373 RF_ASSERT(0);
374 }
375 targetRbuf->count += pssPtr->xorBufCount;
376 pssPtr->xorBufCount = 0;
377 return (0);
378 }
379
380
381 /*
382 * Removes one full buffer from one of the full-buffer lists and returns it.
383 *
384 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
385 */
386 RF_ReconBuffer_t *
387 rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
388 {
389 RF_ReconBuffer_t *p;
390
391 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
392
393 if ((p = reconCtrlPtr->priorityList) != NULL) {
394 reconCtrlPtr->priorityList = p->next;
395 p->next = NULL;
396 goto out;
397 }
398 if ((p = reconCtrlPtr->fullBufferList) != NULL) {
399 reconCtrlPtr->fullBufferList = p->next;
400 p->next = NULL;
401 goto out;
402 }
403 out:
404 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
405 return (p);
406 }
407
408
409 /*
410 * If the reconstruction buffer is full, move it to the full list, which
411 * is maintained sorted by failed disk sector offset.
412 *
413 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
414 */
415 int
416 rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
417 RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
418 {
419 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
420
421 if (rbuf->count == numDataCol) {
422 raidPtr->numFullReconBuffers++;
423 Dprintf2("RECON: rbuf for psid %ld ru %d has filled.\n",
424 (long) rbuf->parityStripeID, rbuf->which_ru);
425 if (!reconCtrl->fullBufferList ||
426 (rbuf->failedDiskSectorOffset <
427 reconCtrl->fullBufferList->failedDiskSectorOffset)) {
428 Dprintf2("RECON: rbuf for psid %ld ru %d is head of"
429 " list.\n", (long) rbuf->parityStripeID,
430 rbuf->which_ru);
431 rbuf->next = reconCtrl->fullBufferList;
432 reconCtrl->fullBufferList = rbuf;
433 } else {
434 for (pt = reconCtrl->fullBufferList, p = pt->next;
435 p && p->failedDiskSectorOffset <
436 rbuf->failedDiskSectorOffset;
437 pt = p, p = p->next);
438 rbuf->next = p;
439 pt->next = rbuf;
440 Dprintf2("RECON: rbuf for psid %ld ru %d is in list.\n",
441 (long) rbuf->parityStripeID, rbuf->which_ru);
442 }
443 #if 0
444 pssPtr->writeRbuf = pssPtr->rbuf; /*
445 * DEBUG ONLY: We like
446 * to be able to find
447 * this rbuf while it's
448 * awaiting write.
449 */
450 #else
451 rbuf->pssPtr = pssPtr;
452 #endif
453 pssPtr->rbuf = NULL;
454 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL,
455 RF_REVENT_BUFREADY);
456 }
457 return (0);
458 }
459
460
461 /*
462 * Release a floating recon buffer for someone else to use.
463 * Assumes the rb_mutex is LOCKED at entry.
464 */
465 void
466 rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row,
467 RF_ReconBuffer_t *rbuf)
468 {
469 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
470 RF_CallbackDesc_t *cb;
471
472 Dprintf2("RECON: releasing rbuf for psid %ld ru %d.\n",
473 (long) rbuf->parityStripeID, rbuf->which_ru);
474
475 /*
476 * If anyone is waiting on buffers, wake one of them up. They will
477 * subsequently wake up anyone else waiting on their RU.
478 */
479 if (rcPtr->bufferWaitList) {
480 rbuf->next = rcPtr->committedRbufs;
481 rcPtr->committedRbufs = rbuf;
482 cb = rcPtr->bufferWaitList;
483 rcPtr->bufferWaitList = cb->next;
484 /* arg==1 => We've committed a buffer. */
485 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1,
486 RF_REVENT_BUFCLEAR);
487 rf_FreeCallbackDesc(cb);
488 raidPtr->procsInBufWait--;
489 } else {
490 rbuf->next = rcPtr->floatingRbufs;
491 rcPtr->floatingRbufs = rbuf;
492 }
493 }
494
495
496 /*
497 * Release any disk that is waiting on a buffer for the indicated RU.
498 * Assumes the rb_mutex is LOCKED at entry.
499 */
500 void
501 rf_ReleaseBufferWaiters(
502 RF_Raid_t *raidPtr,
503 RF_ReconParityStripeStatus_t *pssPtr
504 )
505 {
506 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
507
508 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d.\n",
509 (long) pssPtr->parityStripeID, pssPtr->which_ru);
510 pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
511 while (cb) {
512 cb1 = cb->next;
513 cb->next = NULL;
514 /* arg==0 => We haven't committed a buffer. */
515 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0,
516 RF_REVENT_BUFCLEAR);
517 rf_FreeCallbackDesc(cb);
518 cb = cb1;
519 }
520 pssPtr->bufWaitList = NULL;
521 }
522
523
524 /*
525 * When reconstruction is forced on an RU, there may be some disks waiting to
526 * acquire a buffer for that RU. Since we allocate a new buffer as part of
527 * the forced-reconstruction process, we no longer have to wait for any
528 * buffers, so we wakeup any waiter that we find in the bufferWaitList.
529 *
530 * Assumes the rb_mutex is LOCKED at entry.
531 */
532 void
533 rf_ReleaseBufferWaiter(RF_ReconCtrl_t *rcPtr, RF_ReconBuffer_t *rbuf)
534 {
535 RF_CallbackDesc_t *cb, *cbt;
536
537 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb;
538 cbt = cb, cb = cb->next) {
539 if ((cb->callbackArg.v == rbuf->parityStripeID) &&
540 (cb->callbackArg2.v == rbuf->which_ru)) {
541 Dprintf2("RECON: Dropping row %d col %d from buffer"
542 " wait list.\n", cb->row, cb->col);
543 if (cbt)
544 cbt->next = cb->next;
545 else
546 rcPtr->bufferWaitList = cb->next;
547
548 /* arg==0 => No committed buffer. */
549 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr,
550 cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY);
551 rf_FreeCallbackDesc(cb);
552 return;
553 }
554 }
555 }