1 /* $OpenBSD: rf_stripelocks.c,v 1.5 2002/12/16 07:01:05 tdeval Exp $ */
2 /* $NetBSD: rf_stripelocks.c,v 1.5 2000/01/08 23:45:05 oster Exp $ */
3
4 /*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Authors: Mark Holland, Jim Zelenka
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23 * School of Computer Science
24 * Carnegie Mellon University
25 * Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31 /*
32 * stripelocks.c -- Code to lock stripes for read and write access.
33 *
34 * The code distinguishes between read locks and write locks. There can be
35 * as many readers to given stripe as desired. When a write request comes
36 * in, no further readers are allowed to enter, and all subsequent requests
37 * are queued in FIFO order. When the number of readers goes to zero, the
38 * writer is given the lock. When a writer releases the lock, the list of
39 * queued requests is scanned, and all readers up to the next writer are
40 * given the lock.
41 *
42 * The lock table size must be one less than a power of two, but HASH_STRIPEID
43 * is the only function that requires this.
44 *
45 * The code now supports "range locks". When you ask to lock a stripe, you
46 * specify a range of addresses in that stripe that you want to lock. When
47 * you acquire the lock, you've locked only this range of addresses, and
48 * other threads can concurrently read/write any non-overlapping portions
49 * of the stripe. The "addresses" that you lock are abstract in that you
50 * can pass in anything you like. The expectation is that you'll pass in
51 * the range of physical disk offsets of the parity bits you're planning
52 * to update. The idea behind this, of course, is to allow sub-stripe
53 * locking. The implementation is perhaps not the best imaginable; in the
54 * worst case a lock release is O(n^2) in the total number of outstanding
55 * requests to a given stripe. Note that if you're striping with a
56 * stripe unit size equal to an entire disk (i.e. not striping), there will
57 * be only one stripe and you may spend some significant number of cycles
58 * searching through stripe lock descriptors.
59 */
60
61 #include "rf_types.h"
62 #include "rf_raid.h"
63 #include "rf_stripelocks.h"
64 #include "rf_alloclist.h"
65 #include "rf_general.h"
66 #include "rf_freelist.h"
67 #include "rf_debugprint.h"
68 #include "rf_driver.h"
69 #include "rf_shutdown.h"
70
71 #define Dprintf1(s,a) \
72 rf_debug_printf(s, (void *)((unsigned long)a), \
73 NULL, NULL, NULL, NULL, NULL, NULL, NULL)
74 #define Dprintf2(s,a,b) \
75 rf_debug_printf(s, (void *)((unsigned long)a), \
76 (void *)((unsigned long)b), NULL, NULL, NULL, NULL, NULL, NULL)
77 #define Dprintf3(s,a,b,c) \
78 rf_debug_printf(s, (void *)((unsigned long)a), \
79 (void *)((unsigned long)b), (void *)((unsigned long)c), \
80 NULL, NULL, NULL, NULL, NULL)
81 #define Dprintf4(s,a,b,c,d) \
82 rf_debug_printf(s, (void *)((unsigned long)a), \
83 (void *)((unsigned long)b), (void *)((unsigned long)c), \
84 (void *)((unsigned long)d), NULL, NULL, NULL, NULL)
85 #define Dprintf5(s,a,b,c,d,e) \
86 rf_debug_printf(s, (void *)((unsigned long)a), \
87 (void *)((unsigned long)b), (void *)((unsigned long)c), \
88 (void *)((unsigned long)d), (void *)((unsigned long)e), \
89 NULL, NULL, NULL)
90 #define Dprintf6(s,a,b,c,d,e,f) \
91 rf_debug_printf(s, (void *)((unsigned long)a), \
92 (void *)((unsigned long)b), (void *)((unsigned long)c), \
93 (void *)((unsigned long)d), (void *)((unsigned long)e), \
94 (void *)((unsigned long)f), NULL, NULL)
95 #define Dprintf7(s,a,b,c,d,e,f,g) \
96 rf_debug_printf(s, (void *)((unsigned long)a), \
97 (void *)((unsigned long)b), (void *)((unsigned long)c), \
98 (void *)((unsigned long)d), (void *)((unsigned long)e), \
99 (void *)((unsigned long)f), (void *)((unsigned long)g), NULL)
100 #define Dprintf8(s,a,b,c,d,e,f,g,h) \
101 rf_debug_printf(s, (void *)((unsigned long)a), \
102 (void *)((unsigned long)b), (void *)((unsigned long)c), \
103 (void *)((unsigned long)d), (void *)((unsigned long)e), \
104 (void *)((unsigned long)f), (void *)((unsigned long)g), \
105 (void *)((unsigned long)h))
106
107 #define FLUSH
108
109 #define HASH_STRIPEID(_sid_) ((_sid_) & (rf_lockTableSize-1))
110
111 void rf_AddToWaitersQueue(RF_LockTableEntry_t *, RF_StripeLockDesc_t *,
112 RF_LockReqDesc_t *);
113 RF_StripeLockDesc_t *rf_AllocStripeLockDesc(RF_StripeNum_t);
114 void rf_FreeStripeLockDesc(RF_StripeLockDesc_t *);
115 void rf_PrintLockedStripes(RF_LockTableEntry_t *);
116
117 /*
118 * Determines if two ranges overlap. Always yields false if either start
119 * value is negative.
120 */
121 #define SINGLE_RANGE_OVERLAP(_strt1,_stop1,_strt2,_stop2) \
122 ((_strt1 >= 0) && (_strt2 >= 0) && \
123 (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)))
124
125 /*
126 * Determines if any of the ranges specified in the two lock descriptors
127 * overlap each other.
128 */
129 #define RANGE_OVERLAP(_cand,_pred) \
130 (SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \
131 (_pred)->start, (_pred)->stop) || \
132 SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \
133 (_pred)->start, (_pred)->stop) || \
134 SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \
135 (_pred)->start2, (_pred)->stop2) || \
136 SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \
137 (_pred)->start2, (_pred)->stop2))
138
139 /*
140 * Determines if a candidate lock request conflicts with a predecessor
141 * lock req. Note that the arguments are not interchangeable.
142 * The rules are:
143 * A candidate read conflicts with a predecessor write if any ranges overlap.
144 * A candidate write conflicts with a predecessor read if any ranges overlap.
145 * A candidate write conflicts with a predecessor write if any ranges overlap.
146 */
147 #define STRIPELOCK_CONFLICT(_cand,_pred) \
148 (RANGE_OVERLAP((_cand), (_pred)) && \
149 (((((_cand)->type == RF_IO_TYPE_READ) && \
150 ((_pred)->type == RF_IO_TYPE_WRITE)) || \
151 (((_cand)->type == RF_IO_TYPE_WRITE) && \
152 ((_pred)->type == RF_IO_TYPE_READ)) || \
153 (((_cand)->type == RF_IO_TYPE_WRITE) && \
154 ((_pred)->type == RF_IO_TYPE_WRITE)))))
155
156 static RF_FreeList_t *rf_stripelock_freelist;
157 #define RF_MAX_FREE_STRIPELOCK 128
158 #define RF_STRIPELOCK_INC 8
159 #define RF_STRIPELOCK_INITIAL 32
160
161 void rf_ShutdownStripeLockFreeList(void *);
162 void rf_RaidShutdownStripeLocks(void *);
163
164 void
165 rf_ShutdownStripeLockFreeList(void *ignored)
166 {
167 RF_FREELIST_DESTROY(rf_stripelock_freelist, next,
168 (RF_StripeLockDesc_t *));
169 }
170
171 int
172 rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp)
173 {
174 unsigned mask;
175 int rc;
176
177 RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK,
178 RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t));
179 rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL);
180 if (rc) {
181 RF_ERRORMSG3("Unable to add to shutdown list file %s"
182 " line %d rc=%d.\n", __FILE__, __LINE__, rc);
183 rf_ShutdownStripeLockFreeList(NULL);
184 return (rc);
185 }
186 RF_FREELIST_PRIME(rf_stripelock_freelist, RF_STRIPELOCK_INITIAL, next,
187 (RF_StripeLockDesc_t *));
188 for (mask = 0x1; mask; mask <<= 1)
189 if (rf_lockTableSize == mask)
190 break;
191 if (!mask) {
192 printf("[WARNING: lock table size must be a power of two."
193 " Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE);
194 rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE;
195 }
196 return (0);
197 }
198
199 RF_LockTableEntry_t *
200 rf_MakeLockTable(void)
201 {
202 RF_LockTableEntry_t *lockTable;
203 int i, rc;
204
205 RF_Calloc(lockTable, ((int) rf_lockTableSize),
206 sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *));
207 if (lockTable == NULL)
208 return (NULL);
209 for (i = 0; i < rf_lockTableSize; i++) {
210 rc = rf_mutex_init(&lockTable[i].mutex);
211 if (rc) {
212 RF_ERRORMSG3("Unable to init mutex file %s line %d"
213 " rc=%d.\n", __FILE__, __LINE__, rc);
214 /* XXX Clean up other mutexes. */
215 return (NULL);
216 }
217 }
218 return (lockTable);
219 }
220
221 void
222 rf_ShutdownStripeLocks(RF_LockTableEntry_t *lockTable)
223 {
224 int i;
225
226 if (rf_stripeLockDebug) {
227 rf_PrintLockedStripes(lockTable);
228 }
229 for (i = 0; i < rf_lockTableSize; i++) {
230 rf_mutex_destroy(&lockTable[i].mutex);
231 }
232 RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t));
233 }
234
235 void
236 rf_RaidShutdownStripeLocks(void *arg)
237 {
238 RF_Raid_t *raidPtr = (RF_Raid_t *) arg;
239 rf_ShutdownStripeLocks(raidPtr->lockTable);
240 }
241
242 int
243 rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
244 RF_Config_t *cfgPtr)
245 {
246 int rc;
247
248 raidPtr->lockTable = rf_MakeLockTable();
249 if (raidPtr->lockTable == NULL)
250 return (ENOMEM);
251 rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr);
252 if (rc) {
253 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
254 " rc=%d.\n", __FILE__, __LINE__, rc);
255 rf_ShutdownStripeLocks(raidPtr->lockTable);
256 return (rc);
257 }
258 return (0);
259 }
260
261 /*
262 * Returns 0 if you've got the lock, and non-zero if you have to wait.
263 * If and only if you have to wait, we'll cause cbFunc to get invoked
264 * with cbArg when you are granted the lock. We store a tag in *releaseTag
265 * that you need to give back to us when you release the lock.
266 */
267 int
268 rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
269 RF_LockReqDesc_t *lockReqDesc)
270 {
271 RF_StripeLockDesc_t *lockDesc;
272 RF_LockReqDesc_t *p;
273 int tid = 0, hashval = HASH_STRIPEID(stripeID);
274 int retcode = 0;
275
276 RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));
277
278 if (rf_stripeLockDebug) {
279 if (stripeID == -1)
280 Dprintf1("[%d] Lock acquisition supressed"
281 " (stripeID == -1).\n", tid);
282 else {
283 Dprintf8("[%d] Trying to acquire stripe lock table"
284 " 0x%lx SID %ld type %c range %ld-%ld, range2"
285 " %ld-%ld hashval %d.\n", tid,
286 (unsigned long) lockTable, stripeID,
287 lockReqDesc->type, lockReqDesc->start,
288 lockReqDesc->stop, lockReqDesc->start2,
289 lockReqDesc->stop2);
290 Dprintf3("[%d] lock %ld hashval %d.\n", tid, stripeID,
291 hashval);
292 FLUSH;
293 }
294 }
295 if (stripeID == -1)
296 return (0);
297 lockReqDesc->next = NULL; /* Just to be sure. */
298
299 RF_LOCK_MUTEX(lockTable[hashval].mutex);
300 for (lockDesc = lockTable[hashval].descList; lockDesc;
301 lockDesc = lockDesc->next) {
302 if (lockDesc->stripeID == stripeID)
303 break;
304 }
305
306 if (!lockDesc) {
307 /* No entry in table => no one reading or writing. */
308 lockDesc = rf_AllocStripeLockDesc(stripeID);
309 lockDesc->next = lockTable[hashval].descList;
310 lockTable[hashval].descList = lockDesc;
311 if (lockReqDesc->type == RF_IO_TYPE_WRITE)
312 lockDesc->nWriters++;
313 lockDesc->granted = lockReqDesc;
314 if (rf_stripeLockDebug) {
315 Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld"
316 " %ld-%ld granted.\n", tid, stripeID,
317 lockReqDesc->type,
318 lockReqDesc->start, lockReqDesc->stop,
319 lockReqDesc->start2, lockReqDesc->stop2);
320 FLUSH;
321 }
322 } else {
323
324 if (lockReqDesc->type == RF_IO_TYPE_WRITE)
325 lockDesc->nWriters++;
326
327 if (lockDesc->nWriters == 0) {
328 /*
329 * No need to search any lists if there are no writers
330 * anywhere.
331 */
332 lockReqDesc->next = lockDesc->granted;
333 lockDesc->granted = lockReqDesc;
334 if (rf_stripeLockDebug) {
335 Dprintf7("[%d] no writers: lock %ld %c %ld-%ld"
336 " %ld-%ld granted.\n", tid,
337 stripeID, lockReqDesc->type,
338 lockReqDesc->start, lockReqDesc->stop,
339 lockReqDesc->start2, lockReqDesc->stop2);
340 FLUSH;
341 }
342 } else {
343 /*
344 * Search the granted & waiting lists for a conflict.
345 * Stop searching as soon as we find one.
346 */
347 retcode = 0;
348 for (p = lockDesc->granted; p; p = p->next)
349 if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
350 retcode = 1;
351 break;
352 }
353 if (!retcode)
354 for (p = lockDesc->waitersH; p; p = p->next)
355 if (STRIPELOCK_CONFLICT(lockReqDesc, p))
356 {
357 retcode = 2;
358 break;
359 }
360 if (!retcode) {
361 /* No conflicts found => grant lock */
362 lockReqDesc->next = lockDesc->granted;
363 lockDesc->granted = lockReqDesc;
364 if (rf_stripeLockDebug) {
365 Dprintf7("[%d] no conflicts: lock %ld"
366 " %c %ld-%ld %ld-%ld granted.\n",
367 tid, stripeID, lockReqDesc->type,
368 lockReqDesc->start,
369 lockReqDesc->stop,
370 lockReqDesc->start2,
371 lockReqDesc->stop2);
372 FLUSH;
373 }
374 } else {
375 if (rf_stripeLockDebug) {
376 Dprintf6("[%d] conflict: lock %ld %c"
377 " %ld-%ld hashval=%d not"
378 " granted.\n", tid, stripeID,
379 lockReqDesc->type,
380 lockReqDesc->start,
381 lockReqDesc->stop,
382 hashval);
383 Dprintf3("[%d] lock %ld retcode=%d.\n",
384 tid, stripeID, retcode);
385 FLUSH;
386 }
387 /* Conflict => the current access must wait. */
388 rf_AddToWaitersQueue(lockTable, lockDesc,
389 lockReqDesc);
390 }
391 }
392 }
393
394 RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
395 return (retcode);
396 }
397
398 void
399 rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
400 RF_LockReqDesc_t *lockReqDesc)
401 {
402 RF_StripeLockDesc_t *lockDesc, *ld_t;
403 RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t;
404 RF_IoType_t type = lockReqDesc->type;
405 int tid = 0, hashval = HASH_STRIPEID(stripeID);
406 int release_it, consider_it;
407 RF_LockReqDesc_t *candidate, *candidate_t, *predecessor;
408
409 RF_ASSERT(RF_IO_IS_R_OR_W(type));
410
411 if (rf_stripeLockDebug) {
412 if (stripeID == -1)
413 Dprintf1("[%d] Lock release supressed"
414 " (stripeID == -1).\n", tid);
415 else {
416 Dprintf8("[%d] Releasing stripe lock on stripe ID %ld,"
417 " type %c range %ld-%ld %ld-%ld table 0x%lx.\n",
418 tid, stripeID, lockReqDesc->type,
419 lockReqDesc->start, lockReqDesc->stop,
420 lockReqDesc->start2, lockReqDesc->stop2,
421 lockTable);
422 FLUSH;
423 }
424 }
425 if (stripeID == -1)
426 return;
427
428 RF_LOCK_MUTEX(lockTable[hashval].mutex);
429
430 /* Find the stripe lock descriptor. */
431 for (ld_t = NULL, lockDesc = lockTable[hashval].descList;
432 lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) {
433 if (lockDesc->stripeID == stripeID)
434 break;
435 }
436 RF_ASSERT(lockDesc); /*
437 * Major error to release a lock that doesn't
438 * exist.
439 */
440
441 /* Find the stripe lock request descriptor & delete it from the list. */
442 for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next)
443 if (lr == lockReqDesc)
444 break;
445
446 RF_ASSERT(lr && (lr == lockReqDesc)); /*
447 * Major error to release a
448 * lock that hasn't been
449 * granted.
450 */
451 if (lr_t)
452 lr_t->next = lr->next;
453 else {
454 RF_ASSERT(lr == lockDesc->granted);
455 lockDesc->granted = lr->next;
456 }
457 lr->next = NULL;
458
459 if (lockReqDesc->type == RF_IO_TYPE_WRITE)
460 lockDesc->nWriters--;
461
462 /*
463 * Search through the waiters list to see if anyone needs to be woken
464 * up. For each such descriptor in the wait list, we check it against
465 * everything granted and against everything _in front_ of it in the
466 * waiters queue. If it conflicts with none of these, we release it.
467 *
468 * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST
469 * HERE.
470 * This will roach the case where the callback tries to acquire a new
471 * lock in the same stripe. There are some asserts to try and detect
472 * this.
473 *
474 * We apply 2 performance optimizations:
475 * (1) If releasing this lock results in no more writers to this
476 * stripe, we just release everybody waiting, since we place no
477 * restrictions on the number of concurrent reads.
478 * (2) We consider as candidates for wakeup only those waiters that
479 * have a range overlap with either the descriptor being woken up
480 * or with something in the callbacklist (i.e. something we've
481 * just now woken up).
482 * This allows us to avoid the long evaluation for some descriptors.
483 */
484
485 callbacklist = NULL;
486 if (lockDesc->nWriters == 0) { /* Performance tweak (1). */
487 while (lockDesc->waitersH) {
488
489 lr = lockDesc->waitersH; /*
490 * Delete from waiters
491 * list.
492 */
493 lockDesc->waitersH = lr->next;
494
495 RF_ASSERT(lr->type == RF_IO_TYPE_READ);
496
497 lr->next = lockDesc->granted; /*
498 * Add to granted list.
499 */
500 lockDesc->granted = lr;
501
502 RF_ASSERT(!lr->templink);
503 lr->templink = callbacklist; /*
504 * Put on callback list
505 * so that we'll invoke
506 * callback below.
507 */
508 callbacklist = lr;
509 if (rf_stripeLockDebug) {
510 Dprintf8("[%d] No writers: granting lock"
511 " stripe ID %ld, type %c range %ld-%l"
512 "d %ld-%ld table 0x%lx.\n", tid, stripeID,
513 lr->type, lr->start, lr->stop,
514 lr->start2, lr->stop2,
515 (unsigned long) lockTable);
516 FLUSH;
517 }
518 }
519 lockDesc->waitersT = NULL; /*
520 * We've purged the whole
521 * waiters list.
522 */
523
524 } else
525 for (candidate_t = NULL, candidate = lockDesc->waitersH;
526 candidate;) {
527
528 /* Performance tweak (2). */
529 consider_it = 0;
530 if (RANGE_OVERLAP(lockReqDesc, candidate))
531 consider_it = 1;
532 else
533 for (t = callbacklist; t; t = t->templink)
534 if (RANGE_OVERLAP(t, candidate)) {
535 consider_it = 1;
536 break;
537 }
538 if (!consider_it) {
539 if (rf_stripeLockDebug) {
540 Dprintf8("[%d] No overlap: rejecting"
541 " candidate stripeID %ld, type %c"
542 " range %ld-%ld %ld-%ld table"
543 " 0x%lx.\n", tid, stripeID,
544 candidate->type,
545 candidate->start, candidate->stop,
546 candidate->start2, candidate->stop2,
547 (unsigned long) lockTable);
548 FLUSH;
549 }
550 candidate_t = candidate;
551 candidate = candidate->next;
552 continue;
553 }
554 /*
555 * We have a candidate for release. Check to make
556 * sure it is not blocked by any granted locks.
557 */
558 release_it = 1;
559 for (predecessor = lockDesc->granted; predecessor;
560 predecessor = predecessor->next) {
561 if (STRIPELOCK_CONFLICT(candidate, predecessor))
562 {
563 if (rf_stripeLockDebug) {
564 Dprintf8("[%d] Conflicts with"
565 " granted lock: rejecting"
566 " candidate stripeID %ld,"
567 " type %c range %ld-%ld"
568 " %ld-%ld table 0x%lx.\n",
569 tid, stripeID,
570 candidate->type,
571 candidate->start,
572 candidate->stop,
573 candidate->start2,
574 candidate->stop2,
575 (unsigned long) lockTable);
576 FLUSH;
577 }
578 release_it = 0;
579 break;
580 }
581 }
582
583 /*
584 * Now check to see if the candidate is blocked by any
585 * waiters that occur before it in the wait queue.
586 */
587 if (release_it)
588 for (predecessor = lockDesc->waitersH;
589 predecessor != candidate;
590 predecessor = predecessor->next) {
591 if (STRIPELOCK_CONFLICT(candidate,
592 predecessor)) {
593 if (rf_stripeLockDebug) {
594 Dprintf8("[%d]"
595 " Conflicts with"
596 " waiting lock:"
597 " rejecting"
598 " candidate"
599 " stripeID %ld,"
600 " type %c"
601 " range %ld-%ld"
602 " %ld-%ld"
603 " table 0x%lx.\n",
604 tid, stripeID,
605 candidate->type,
606 candidate->start,
607 candidate->stop,
608 candidate->start2,
609 candidate->stop2,
610 (unsigned long)
611 lockTable);
612 FLUSH;
613 }
614 release_it = 0;
615 break;
616 }
617 }
618
619 /* Release it if indicated. */
620 if (release_it) {
621 if (rf_stripeLockDebug) {
622 Dprintf8("[%d] Granting lock to"
623 " candidate stripeID %ld, type %c"
624 " range %ld-%ld %ld-%ld table"
625 " 0x%lx.\n", tid, stripeID,
626 candidate->type,
627 candidate->start, candidate->stop,
628 candidate->start2, candidate->stop2,
629 (unsigned long) lockTable);
630 FLUSH;
631 }
632 if (candidate_t) {
633 candidate_t->next = candidate->next;
634 if (lockDesc->waitersT == candidate)
635 /*
636 * Cannot be waitersH
637 * since candidate_t is
638 * not NULL.
639 */
640 lockDesc->waitersT =
641 candidate_t;
642 } else {
643 RF_ASSERT(candidate ==
644 lockDesc->waitersH);
645 lockDesc->waitersH =
646 lockDesc->waitersH->next;
647 if (!lockDesc->waitersH)
648 lockDesc->waitersT = NULL;
649 }
650 /* Move it to the granted list. */
651 candidate->next = lockDesc->granted;
652 lockDesc->granted = candidate;
653
654 RF_ASSERT(!candidate->templink);
655 /*
656 * Put it on the list of things to be called
657 * after we release the mutex.
658 */
659 candidate->templink = callbacklist;
660 callbacklist = candidate;
661
662 if (!candidate_t)
663 candidate = lockDesc->waitersH;
664 else
665 /*
666 * Continue with the rest of the list.
667 */
668 candidate = candidate_t->next;
669 } else {
670 candidate_t = candidate;
671 /* Continue with the rest of the list. */
672 candidate = candidate->next;
673 }
674 }
675
676 /* Delete the descriptor if no one is waiting or active. */
677 if (!lockDesc->granted && !lockDesc->waitersH) {
678 RF_ASSERT(lockDesc->nWriters == 0);
679 if (rf_stripeLockDebug) {
680 Dprintf3("[%d] Last lock released (table 0x%lx):"
681 " deleting desc for stripeID %ld.\n", tid,
682 (unsigned long) lockTable, stripeID);
683 FLUSH;
684 }
685 if (ld_t)
686 ld_t->next = lockDesc->next;
687 else {
688 RF_ASSERT(lockDesc == lockTable[hashval].descList);
689 lockTable[hashval].descList = lockDesc->next;
690 }
691 rf_FreeStripeLockDesc(lockDesc);
692 lockDesc = NULL; /* Only for the ASSERT below. */
693 }
694 RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
695
696 /*
697 * Now that we've unlocked the mutex, invoke the callback on all the
698 * descriptors in the list.
699 */
700 RF_ASSERT(!((callbacklist) && (!lockDesc))); /*
701 * If we deleted the
702 * descriptor, we should
703 * have no callbacks to
704 * do.
705 */
706 for (candidate = callbacklist; candidate;) {
707 t = candidate;
708 candidate = candidate->templink;
709 t->templink = NULL;
710 (t->cbFunc) (t->cbArg);
711 }
712 }
713
714 /* Must have the indicated lock table mutex upon entry. */
715 void
716 rf_AddToWaitersQueue(RF_LockTableEntry_t *lockTable,
717 RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc)
718 {
719 int tid;
720
721 if (rf_stripeLockDebug) {
722 Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx.\n",
723 tid, lockDesc->stripeID, (unsigned long) lockTable);
724 FLUSH;
725 }
726 if (!lockDesc->waitersH) {
727 lockDesc->waitersH = lockDesc->waitersT = lockReqDesc;
728 } else {
729 lockDesc->waitersT->next = lockReqDesc;
730 lockDesc->waitersT = lockReqDesc;
731 }
732 }
733
734 RF_StripeLockDesc_t *
735 rf_AllocStripeLockDesc(RF_StripeNum_t stripeID)
736 {
737 RF_StripeLockDesc_t *p;
738
739 RF_FREELIST_GET(rf_stripelock_freelist, p, next,
740 (RF_StripeLockDesc_t *));
741 if (p) {
742 p->stripeID = stripeID;
743 }
744 return (p);
745 }
746
747 void
748 rf_FreeStripeLockDesc(RF_StripeLockDesc_t *p)
749 {
750 RF_FREELIST_FREE(rf_stripelock_freelist, p, next);
751 }
752
753 void
754 rf_PrintLockedStripes(RF_LockTableEntry_t *lockTable)
755 {
756 int i, j, foundone = 0, did;
757 RF_StripeLockDesc_t *p;
758 RF_LockReqDesc_t *q;
759
760 RF_LOCK_MUTEX(rf_printf_mutex);
761 printf("Locked stripes:\n");
762 for (i = 0; i < rf_lockTableSize; i++)
763 if (lockTable[i].descList) {
764 foundone = 1;
765 for (p = lockTable[i].descList; p; p = p->next) {
766 printf("Stripe ID 0x%lx (%d) nWriters %d\n",
767 (long) p->stripeID, (int) p->stripeID,
768 p->nWriters);
769
770 if (!(p->granted))
771 printf("Granted: (none)\n");
772 else
773 printf("Granted:\n");
774 for (did = 1, j = 0, q = p->granted; q;
775 j++, q = q->next) {
776 printf(" %c(%ld-%ld", q->type,
777 (long) q->start, (long) q->stop);
778 if (q->start2 != -1)
779 printf(",%ld-%ld) ",
780 (long) q->start2,
781 (long) q->stop2);
782 else
783 printf(") ");
784 if (j && !(j % 4)) {
785 printf("\n");
786 did = 1;
787 } else
788 did = 0;
789 }
790 if (!did)
791 printf("\n");
792
793 if (!(p->waitersH))
794 printf("Waiting: (none)\n");
795 else
796 printf("Waiting:\n");
797 for (did = 1, j = 0, q = p->waitersH; q;
798 j++, q = q->next) {
799 printf("%c(%ld-%ld", q->type,
800 (long) q->start, (long) q->stop);
801 if (q->start2 != -1)
802 printf(",%ld-%ld) ",
803 (long) q->start2,
804 (long) q->stop2);
805 else
806 printf(") ");
807 if (j && !(j % 4)) {
808 printf("\n ");
809 did = 1;
810 } else
811 did = 0;
812 }
813 if (!did)
814 printf("\n");
815 }
816 }
817 if (!foundone)
818 printf("(none)\n");
819 else
820 printf("\n");
821 RF_UNLOCK_MUTEX(rf_printf_mutex);
822 }