root/dev/raidframe/rf_driver.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_BootRaidframe
  2. rf_UnbootRaidframe
  3. rf_UnconfigureArray
  4. rf_Shutdown
  5. rf_Configure
  6. rf_init_rad
  7. rf_clean_rad
  8. rf_ShutdownRDFreeList
  9. rf_ConfigureRDFreeList
  10. rf_AllocRaidAccDesc
  11. rf_FreeRaidAccDesc
  12. rf_DoAccess
  13. rf_SetReconfiguredMode
  14. rf_FailDisk
  15. rf_SignalQuiescenceLock
  16. rf_SuspendNewRequestsAndWait
  17. rf_ResumeNewRequests
  18. rf_set_debug_option
  19. rf_ConfigureDebug
  20. rf_InitThroughputStats
  21. rf_StartThroughputStats
  22. rf_StopThroughputStats
  23. rf_PrintThroughputStats
  24. rf_StartUserStats
  25. rf_StopUserStats
  26. rf_UpdateUserStats
  27. rf_PrintUserStats

    1 /*      $OpenBSD: rf_driver.c,v 1.11 2002/12/16 07:01:03 tdeval Exp $   */
    2 /*      $NetBSD: rf_driver.c,v 1.37 2000/06/04 02:05:13 oster Exp $     */
    3 
    4 /*
    5  * Copyright (c) 1999 The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Greg Oster
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the NetBSD
   22  *      Foundation, Inc. and its contributors.
   23  * 4. Neither the name of The NetBSD Foundation nor the names of its
   24  *    contributors may be used to endorse or promote products derived
   25  *    from this software without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 /*
   41  * Copyright (c) 1995 Carnegie-Mellon University.
   42  * All rights reserved.
   43  *
   44  * Author:      Mark Holland, Khalil Amiri, Claudson Bornstein,
   45  *              William V. Courtright II, Robby Findler, Daniel Stodolsky,
   46  *              Rachad Youssef, Jim Zelenka
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  */
   68 
   69 /*****************************************************************************
   70  *
   71  * rf_driver.c -- Main setup, teardown, and access routines for the RAID
   72  *                driver
   73  *
   74  * All routines are prefixed with rf_ (RAIDframe), to avoid conficts.
   75  *
   76  *****************************************************************************/
   77 
   78 #include <sys/types.h>
   79 #include <sys/param.h>
   80 #include <sys/systm.h>
   81 #include <sys/ioctl.h>
   82 #include <sys/fcntl.h>
   83 #ifdef  __NetBSD__
   84 #include <sys/vnode.h>
   85 #endif
   86 
   87 
   88 #include "rf_archs.h"
   89 #include "rf_threadstuff.h"
   90 
   91 
   92 #include <sys/errno.h>
   93 
   94 #include "rf_raid.h"
   95 #include "rf_dag.h"
   96 #include "rf_aselect.h"
   97 #include "rf_diskqueue.h"
   98 #include "rf_parityscan.h"
   99 #include "rf_alloclist.h"
  100 #include "rf_dagutils.h"
  101 #include "rf_utils.h"
  102 #include "rf_etimer.h"
  103 #include "rf_acctrace.h"
  104 #include "rf_configure.h"
  105 #include "rf_general.h"
  106 #include "rf_desc.h"
  107 #include "rf_states.h"
  108 #include "rf_freelist.h"
  109 #include "rf_decluster.h"
  110 #include "rf_map.h"
  111 #include "rf_revent.h"
  112 #include "rf_callback.h"
  113 #include "rf_engine.h"
  114 #include "rf_memchunk.h"
  115 #include "rf_mcpair.h"
  116 #include "rf_nwayxor.h"
  117 #include "rf_debugprint.h"
  118 #include "rf_copyback.h"
  119 #include "rf_driver.h"
  120 #include "rf_options.h"
  121 #include "rf_shutdown.h"
  122 #include "rf_kintf.h"
  123 
  124 #include <sys/buf.h>
  125 
  126 /* rad == RF_RaidAccessDesc_t */
  127 static RF_FreeList_t *rf_rad_freelist;
  128 #define RF_MAX_FREE_RAD         128
  129 #define RF_RAD_INC               16
  130 #define RF_RAD_INITIAL           32
  131 
  132 /* Debug variables. */
  133 char    rf_panicbuf[2048];      /*
  134                                  * A buffer to hold an error msg when we panic.
  135                                  */
  136 
  137 /* Main configuration routines. */
  138 static int raidframe_booted = 0;
  139 
  140 void rf_ConfigureDebug(RF_Config_t *);
  141 void rf_set_debug_option(char *, long);
  142 void rf_UnconfigureArray(void);
  143 int  rf_init_rad(RF_RaidAccessDesc_t *);
  144 void rf_clean_rad(RF_RaidAccessDesc_t *);
  145 void rf_ShutdownRDFreeList(void *);
  146 int  rf_ConfigureRDFreeList(RF_ShutdownList_t **);
  147 
  148 RF_DECLARE_MUTEX(rf_printf_mutex);      /*
  149                                          * Debug only: Avoids interleaved
  150                                          * printfs by different stripes.
  151                                          */
  152 
  153 #define SIGNAL_QUIESCENT_COND(_raid_)   wakeup(&((_raid_)->accesses_suspended))
  154 #define WAIT_FOR_QUIESCENCE(_raid_)                                     \
  155         tsleep(&((_raid_)->accesses_suspended), PRIBIO, "RAIDframe quiesce", 0);
  156 
  157 #define IO_BUF_ERR(bp, err)                                             \
  158 do {                                                                    \
  159         bp->b_flags |= B_ERROR;                                         \
  160         bp->b_resid = bp->b_bcount;                                     \
  161         bp->b_error = err;                                              \
  162         biodone(bp);                                                    \
  163 } while (0)
  164 
  165 static int configureCount = 0;  /* Number of active configurations. */
  166 static int isconfigged = 0;     /*
  167                                  * Is basic RAIDframe (non per-array)
  168                                  * stuff configured ?
  169                                  */
  170 RF_DECLARE_STATIC_MUTEX(configureMutex);        /*
  171                                                  * Used to lock the
  172                                                  * configuration stuff.
  173                                                  */
  174 static RF_ShutdownList_t *globalShutdown;       /* Non array-specific stuff. */
  175 int  rf_ConfigureRDFreeList(RF_ShutdownList_t **);
  176 
  177 
  178 /* Called at system boot time. */
  179 int
  180 rf_BootRaidframe(void)
  181 {
  182         int rc;
  183 
  184         if (raidframe_booted)
  185                 return (EBUSY);
  186         raidframe_booted = 1;
  187 
  188         rc = rf_mutex_init(&configureMutex);
  189         if (rc) {
  190                 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n",
  191                     __FILE__, __LINE__, rc);
  192                 RF_PANIC();
  193         }
  194         configureCount = 0;
  195         isconfigged = 0;
  196         globalShutdown = NULL;
  197         return (0);
  198 }
  199 
  200 
  201 /*
  202  * This function is really just for debugging user-level stuff: It
  203  * frees up all memory, other RAIDframe resources that might otherwise
  204  * be kept around. This is used with systems like "sentinel" to detect
  205  * memory leaks.
  206  */
  207 int
  208 rf_UnbootRaidframe(void)
  209 {
  210         int rc;
  211 
  212         RF_LOCK_MUTEX(configureMutex);
  213         if (configureCount) {
  214                 RF_UNLOCK_MUTEX(configureMutex);
  215                 return (EBUSY);
  216         }
  217         raidframe_booted = 0;
  218         RF_UNLOCK_MUTEX(configureMutex);
  219         rc = rf_mutex_destroy(&configureMutex);
  220         if (rc) {
  221                 RF_ERRORMSG3("Unable to destroy mutex file %s line %d"
  222                     " rc=%d.\n", __FILE__, __LINE__, rc);
  223                 RF_PANIC();
  224         }
  225         return (0);
  226 }
  227 
  228 
  229 /*
  230  * Called whenever an array is shutdown.
  231  */
  232 void
  233 rf_UnconfigureArray(void)
  234 {
  235         int rc;
  236 
  237         RF_LOCK_MUTEX(configureMutex);
  238         if (--configureCount == 0) {    /*
  239                                          * If no active configurations, shut
  240                                          * everything down.
  241                                          */
  242                 isconfigged = 0;
  243 
  244                 rc = rf_ShutdownList(&globalShutdown);
  245                 if (rc) {
  246                         RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown,"
  247                             " rc=%d.\n", rc);
  248                 }
  249 
  250                 /*
  251                  * We must wait until now, because the AllocList module
  252                  * uses the DebugMem module.
  253                  */
  254                 if (rf_memDebug)
  255                         rf_print_unfreed();
  256         }
  257         RF_UNLOCK_MUTEX(configureMutex);
  258 }
  259 
  260 
  261 /*
  262  * Called to shut down an array.
  263  */
  264 int
  265 rf_Shutdown(RF_Raid_t *raidPtr)
  266 {
  267         if (!raidPtr->valid) {
  268                 RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe"
  269                     " driver. Aborting shutdown.\n");
  270                 return (EINVAL);
  271         }
  272         /*
  273          * Wait for outstanding IOs to land.
  274          * As described in rf_raid.h, we use the rad_freelist lock
  275          * to protect the per-array info about outstanding descs,
  276          * since we need to do freelist locking anyway, and this
  277          * cuts down on the amount of serialization we've got going
  278          * on.
  279          */
  280         RF_FREELIST_DO_LOCK(rf_rad_freelist);
  281         if (raidPtr->waitShutdown) {
  282                 RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
  283                 return (EBUSY);
  284         }
  285         raidPtr->waitShutdown = 1;
  286         while (raidPtr->nAccOutstanding) {
  287                 RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist));
  288         }
  289         RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
  290 
  291         /* Wait for any parity re-writes to stop... */
  292         while (raidPtr->parity_rewrite_in_progress) {
  293                 printf("Waiting for parity re-write to exit...\n");
  294                 tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
  295                        "rfprwshutdown", 0);
  296         }
  297 
  298         raidPtr->valid = 0;
  299 
  300         rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
  301 
  302         rf_UnconfigureVnodes(raidPtr);
  303 
  304         rf_ShutdownList(&raidPtr->shutdownList);
  305 
  306         rf_UnconfigureArray();
  307 
  308         return (0);
  309 }
  310 
  311 #define DO_INIT_CONFIGURE(f)                                            \
  312 do {                                                                    \
  313         rc = f (&globalShutdown);                                       \
  314         if (rc) {                                                       \
  315                 RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n",         \
  316                     RF_STRING(f), rc);                                  \
  317                 rf_ShutdownList(&globalShutdown);                       \
  318                 configureCount--;                                       \
  319                 RF_UNLOCK_MUTEX(configureMutex);                        \
  320                 return(rc);                                             \
  321         }                                                               \
  322 } while (0)
  323 
  324 #define DO_RAID_FAIL()                                                  \
  325 do {                                                                    \
  326         rf_UnconfigureVnodes(raidPtr);                                  \
  327         rf_ShutdownList(&raidPtr->shutdownList);                        \
  328         rf_UnconfigureArray();                                          \
  329 } while (0)
  330 
  331 #define DO_RAID_INIT_CONFIGURE(f)                                       \
  332 do {                                                                    \
  333         rc = (f)(&raidPtr->shutdownList, raidPtr, cfgPtr);              \
  334         if (rc) {                                                       \
  335                 RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n",         \
  336                     RF_STRING(f), rc);                                  \
  337                 DO_RAID_FAIL();                                         \
  338                 return(rc);                                             \
  339         }                                                               \
  340 } while (0)
  341 
  342 #define DO_RAID_MUTEX(_m_)                                              \
  343 do {                                                                    \
  344         rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_));    \
  345         if (rc) {                                                       \
  346                 RF_ERRORMSG3("Unable to init mutex file %s line %d"     \
  347                     " rc=%d.\n", __FILE__, __LINE__, rc);               \
  348                 DO_RAID_FAIL();                                         \
  349                 return(rc);                                             \
  350         }                                                               \
  351 } while (0)
  352 
  353 #define DO_RAID_COND(_c_)                                               \
  354 do {                                                                    \
  355         rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_));     \
  356         if (rc) {                                                       \
  357                 RF_ERRORMSG3("Unable to init cond file %s line %d"      \
  358                     " rc=%d.\n", __FILE__, __LINE__, rc);               \
  359                 DO_RAID_FAIL();                                         \
  360                 return(rc);                                             \
  361         }                                                               \
  362 } while (0)
  363 
  364 int
  365 rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
  366 {
  367         RF_RowCol_t row, col;
  368         int i, rc;
  369 
  370         /*
  371          * XXX This check can probably be removed now, since
  372          * RAIDFRAME_CONFIGURE now checks to make sure that the
  373          * RAID set is not already valid.
  374          */
  375         if (raidPtr->valid) {
  376                 RF_ERRORMSG("RAIDframe configuration not shut down."
  377                     " Aborting configure.\n");
  378                 return (EINVAL);
  379         }
  380         RF_LOCK_MUTEX(configureMutex);
  381         configureCount++;
  382         if (isconfigged == 0) {
  383                 rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex);
  384                 if (rc) {
  385                         RF_ERRORMSG3("Unable to init mutex file %s line %d"
  386                             " rc=%d.\n", __FILE__, __LINE__, rc);
  387                         rf_ShutdownList(&globalShutdown);
  388                         return (rc);
  389                 }
  390                 /* Initialize globals. */
  391 #ifdef  RAIDDEBUG
  392                 printf("RAIDFRAME: protectedSectors is %ld.\n",
  393                        rf_protectedSectors);
  394 #endif  /* RAIDDEBUG */
  395 
  396                 rf_clear_debug_print_buffer();
  397 
  398                 DO_INIT_CONFIGURE(rf_ConfigureAllocList);
  399 
  400                 /*
  401                  * Yes, this does make debugging general to the whole
  402                  * system instead of being array specific. Bummer, drag.
  403                  */
  404                 rf_ConfigureDebug(cfgPtr);
  405                 DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
  406                 DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
  407                 DO_INIT_CONFIGURE(rf_ConfigureMapModule);
  408                 DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
  409                 DO_INIT_CONFIGURE(rf_ConfigureCallback);
  410                 DO_INIT_CONFIGURE(rf_ConfigureMemChunk);
  411                 DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
  412                 DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
  413                 DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
  414                 DO_INIT_CONFIGURE(rf_ConfigureMCPair);
  415                 DO_INIT_CONFIGURE(rf_ConfigureDAGs);
  416                 DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
  417                 DO_INIT_CONFIGURE(rf_ConfigureDebugPrint);
  418                 DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
  419                 DO_INIT_CONFIGURE(rf_ConfigureCopyback);
  420                 DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
  421                 isconfigged = 1;
  422         }
  423         RF_UNLOCK_MUTEX(configureMutex);
  424 
  425         DO_RAID_MUTEX(&raidPtr->mutex);
  426         /*
  427          * Set up the cleanup list. Do this after ConfigureDebug so that
  428          * value of memDebug will be set.
  429          */
  430 
  431         rf_MakeAllocList(raidPtr->cleanupList);
  432         if (raidPtr->cleanupList == NULL) {
  433                 DO_RAID_FAIL();
  434                 return (ENOMEM);
  435         }
  436         rc = rf_ShutdownCreate(&raidPtr->shutdownList,
  437             (void (*) (void *)) rf_FreeAllocList, raidPtr->cleanupList);
  438         if (rc) {
  439                 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
  440                     " rc=%d.\n", __FILE__, __LINE__, rc);
  441                 DO_RAID_FAIL();
  442                 return (rc);
  443         }
  444         raidPtr->numRow = cfgPtr->numRow;
  445         raidPtr->numCol = cfgPtr->numCol;
  446         raidPtr->numSpare = cfgPtr->numSpare;
  447 
  448         /*
  449          * XXX We don't even pretend to support more than one row in the
  450          * kernel...
  451          */
  452         if (raidPtr->numRow != 1) {
  453                 RF_ERRORMSG("Only one row supported in kernel.\n");
  454                 DO_RAID_FAIL();
  455                 return (EINVAL);
  456         }
  457         RF_CallocAndAdd(raidPtr->status, raidPtr->numRow,
  458             sizeof(RF_RowStatus_t), (RF_RowStatus_t *), raidPtr->cleanupList);
  459         if (raidPtr->status == NULL) {
  460                 DO_RAID_FAIL();
  461                 return (ENOMEM);
  462         }
  463         RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow,
  464             sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList);
  465         if (raidPtr->reconControl == NULL) {
  466                 DO_RAID_FAIL();
  467                 return (ENOMEM);
  468         }
  469         for (i = 0; i < raidPtr->numRow; i++) {
  470                 raidPtr->status[i] = rf_rs_optimal;
  471                 raidPtr->reconControl[i] = NULL;
  472         }
  473 
  474         DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
  475         DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
  476 
  477         DO_RAID_COND(&raidPtr->outstandingCond);
  478 
  479         raidPtr->nAccOutstanding = 0;
  480         raidPtr->waitShutdown = 0;
  481 
  482         DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
  483         DO_RAID_COND(&raidPtr->quiescent_cond);
  484 
  485         DO_RAID_COND(&raidPtr->waitForReconCond);
  486 
  487         DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex);
  488 
  489         if (ac != NULL) {
  490                 /*
  491                  * We have an AutoConfig structure... Don't do the
  492                  * normal disk configuration... call the auto config
  493                  * stuff.
  494                  */
  495                 rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
  496         } else {
  497                 DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
  498                 DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
  499         }
  500         /*
  501          * Do this after ConfigureDisks & ConfigureSpareDisks to be sure
  502          * devno is set.
  503          */
  504         DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
  505 
  506         DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
  507 
  508         DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
  509 
  510         for (row = 0; row < raidPtr->numRow; row++) {
  511                 for (col = 0; col < raidPtr->numCol; col++) {
  512                         /*
  513                          * XXX Better distribution.
  514                          */
  515                         raidPtr->hist_diskreq[row][col] = 0;
  516                 }
  517         }
  518 
  519         raidPtr->numNewFailures = 0;
  520         raidPtr->copyback_in_progress = 0;
  521         raidPtr->parity_rewrite_in_progress = 0;
  522         raidPtr->recon_in_progress = 0;
  523         raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
  524 
  525         /*
  526          * Autoconfigure and root_partition will actually get filled in
  527          * after the config is done.
  528          */
  529         raidPtr->autoconfigure = 0;
  530         raidPtr->root_partition = 0;
  531         raidPtr->last_unit = raidPtr->raidid;
  532         raidPtr->config_order = 0;
  533 
  534         if (rf_keepAccTotals) {
  535                 raidPtr->keep_acc_totals = 1;
  536         }
  537         rf_StartUserStats(raidPtr);
  538 
  539         raidPtr->valid = 1;
  540         return (0);
  541 }
  542 
  543 int
  544 rf_init_rad(RF_RaidAccessDesc_t *desc)
  545 {
  546         int rc;
  547 
  548         rc = rf_mutex_init(&desc->mutex);
  549         if (rc) {
  550                 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", __FILE__,
  551                     __LINE__, rc);
  552                 return (rc);
  553         }
  554         rc = rf_cond_init(&desc->cond);
  555         if (rc) {
  556                 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d.\n", __FILE__,
  557                     __LINE__, rc);
  558                 rf_mutex_destroy(&desc->mutex);
  559                 return (rc);
  560         }
  561         return (0);
  562 }
  563 
  564 void
  565 rf_clean_rad(RF_RaidAccessDesc_t *desc)
  566 {
  567         rf_mutex_destroy(&desc->mutex);
  568         rf_cond_destroy(&desc->cond);
  569 }
  570 
  571 void
  572 rf_ShutdownRDFreeList(void *ignored)
  573 {
  574         RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next,
  575             (RF_RaidAccessDesc_t *), rf_clean_rad);
  576 }
  577 
  578 int
  579 rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
  580 {
  581         int rc;
  582 
  583         RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD,
  584             RF_RAD_INC, sizeof(RF_RaidAccessDesc_t));
  585         if (rf_rad_freelist == NULL) {
  586                 return (ENOMEM);
  587         }
  588         rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
  589         if (rc) {
  590                 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d.\n", __FILE__,
  591                     __LINE__, rc);
  592                 rf_ShutdownRDFreeList(NULL);
  593                 return (rc);
  594         }
  595         RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next,
  596             (RF_RaidAccessDesc_t *), rf_init_rad);
  597         return (0);
  598 }
  599 
  600 RF_RaidAccessDesc_t *
  601 rf_AllocRaidAccDesc(
  602     RF_Raid_t                    *raidPtr,
  603     RF_IoType_t                   type,
  604     RF_RaidAddr_t                 raidAddress,
  605     RF_SectorCount_t              numBlocks,
  606     caddr_t                       bufPtr,
  607     void                         *bp,
  608     RF_DagHeader_t              **paramDAG,
  609     RF_AccessStripeMapHeader_t  **paramASM,
  610     RF_RaidAccessFlags_t          flags,
  611     void                        (*cbF) (struct buf *),
  612     void                         *cbA,
  613     RF_AccessState_t             *states
  614 )
  615 {
  616         RF_RaidAccessDesc_t *desc;
  617 
  618         RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next,
  619             (RF_RaidAccessDesc_t *), rf_init_rad);
  620         if (raidPtr->waitShutdown) {
  621                 /*
  622                  * Actually, we're shutting the array down. Free the desc
  623                  * and return NULL.
  624                  */
  625                 RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
  626                 RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next,
  627                     rf_clean_rad);
  628                 return (NULL);
  629         }
  630         raidPtr->nAccOutstanding++;
  631         RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
  632 
  633         desc->raidPtr = (void *) raidPtr;
  634         desc->type = type;
  635         desc->raidAddress = raidAddress;
  636         desc->numBlocks = numBlocks;
  637         desc->bufPtr = bufPtr;
  638         desc->bp = bp;
  639         desc->paramDAG = paramDAG;
  640         desc->paramASM = paramASM;
  641         desc->flags = flags;
  642         desc->states = states;
  643         desc->state = 0;
  644 
  645         desc->status = 0;
  646         bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t));
  647         desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF;     /* XXX */
  648         desc->callbackArg = cbA;
  649         desc->next = NULL;
  650         desc->head = desc;
  651         desc->numPending = 0;
  652         desc->cleanupList = NULL;
  653         rf_MakeAllocList(desc->cleanupList);
  654         return (desc);
  655 }
  656 
  657 void
  658 rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc)
  659 {
  660         RF_Raid_t *raidPtr = desc->raidPtr;
  661 
  662         RF_ASSERT(desc);
  663 
  664         rf_FreeAllocList(desc->cleanupList);
  665         RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, rf_clean_rad);
  666         raidPtr->nAccOutstanding--;
  667         if (raidPtr->waitShutdown) {
  668                 RF_SIGNAL_COND(raidPtr->outstandingCond);
  669         }
  670         RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
  671 }
  672 
  673 
  674 /********************************************************************
  675  * Main routine for performing an access.
  676  * Accesses are retried until a DAG can not be selected. This occurs
  677  * when either the DAG library is incomplete or there are too many
  678  * failures in a parity group.
  679  ********************************************************************/
  680 int
  681 rf_DoAccess(
  682     RF_Raid_t                    *raidPtr,
  683     RF_IoType_t                   type,         /* Should be read or write. */
  684     int                           async_flag,   /*
  685                                                  * Should be RF_TRUE
  686                                                  * or RF_FALSE.
  687                                                  */
  688     RF_RaidAddr_t                 raidAddress,
  689     RF_SectorCount_t              numBlocks,
  690     caddr_t                       bufPtr,
  691     void                         *bp_in,        /*
  692                                                  * It's a buf pointer.
  693                                                  * void * to facilitate
  694                                                  * ignoring it outside
  695                                                  * the kernel.
  696                                                  */
  697     RF_DagHeader_t              **paramDAG,
  698     RF_AccessStripeMapHeader_t  **paramASM,
  699     RF_RaidAccessFlags_t          flags,
  700     RF_RaidAccessDesc_t         **paramDesc,
  701     void                        (*cbF) (struct buf *),
  702     void                         *cbA
  703 )
  704 {
  705         RF_RaidAccessDesc_t *desc;
  706         caddr_t lbufPtr = bufPtr;
  707         struct buf *bp = (struct buf *) bp_in;
  708 
  709         raidAddress += rf_raidSectorOffset;
  710 
  711         if (!raidPtr->valid) {
  712                 RF_ERRORMSG("RAIDframe driver not successfully configured."
  713                     " Rejecting access.\n");
  714                 IO_BUF_ERR(bp, EINVAL);
  715                 return (EINVAL);
  716         }
  717 
  718         if (rf_accessDebug) {
  719 
  720                 printf("logBytes is: %d %d %d.\n", raidPtr->raidid,
  721                     raidPtr->logBytesPerSector,
  722                     (int) rf_RaidAddressToByte(raidPtr, numBlocks));
  723                 printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx.\n", raidPtr->raidid,
  724                     (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
  725                     (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
  726                     (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
  727                     (int) numBlocks,
  728                     (int) rf_RaidAddressToByte(raidPtr, numBlocks),
  729                     (long) bufPtr);
  730         }
  731         if (raidAddress + numBlocks > raidPtr->totalSectors) {
  732 
  733                 printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu.\n",
  734                     (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
  735 
  736                         IO_BUF_ERR(bp, ENOSPC);
  737                         return (ENOSPC);
  738         }
  739         desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
  740             numBlocks, lbufPtr, bp, paramDAG, paramASM,
  741             flags, cbF, cbA, raidPtr->Layout.map->states);
  742 
  743         if (desc == NULL) {
  744                 return (ENOMEM);
  745         }
  746         RF_ETIMER_START(desc->tracerec.tot_timer);
  747 
  748         desc->async_flag = async_flag;
  749 
  750         rf_ContinueRaidAccess(desc);
  751 
  752         return (0);
  753 }
  754 
  755 
  756 /* Force the array into reconfigured mode without doing reconstruction. */
  757 int
  758 rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int row, int col)
  759 {
  760         if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
  761                 printf("Can't set reconfigured mode in dedicated-spare"
  762                     " array.\n");
  763                 RF_PANIC();
  764         }
  765         RF_LOCK_MUTEX(raidPtr->mutex);
  766         raidPtr->numFailures++;
  767         raidPtr->Disks[row][col].status = rf_ds_dist_spared;
  768         raidPtr->status[row] = rf_rs_reconfigured;
  769         rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
  770         /*
  771          * Install spare table only if declustering + distributed sparing
  772          * architecture.
  773          */
  774         if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
  775                 rf_InstallSpareTable(raidPtr, row, col);
  776         RF_UNLOCK_MUTEX(raidPtr->mutex);
  777         return (0);
  778 }
  779 
  780 extern int fail_row, fail_col, fail_time;
  781 extern int delayed_recon;
  782 
  783 int
  784 rf_FailDisk(RF_Raid_t *raidPtr, int frow, int fcol, int initRecon)
  785 {
  786         printf("raid%d: Failing disk r%d c%d.\n", raidPtr->raidid, frow, fcol);
  787         RF_LOCK_MUTEX(raidPtr->mutex);
  788         raidPtr->numFailures++;
  789         raidPtr->Disks[frow][fcol].status = rf_ds_failed;
  790         raidPtr->status[frow] = rf_rs_degraded;
  791         rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
  792         RF_UNLOCK_MUTEX(raidPtr->mutex);
  793         if (initRecon)
  794                 rf_ReconstructFailedDisk(raidPtr, frow, fcol);
  795         return (0);
  796 }
  797 
  798 
  799 /*
  800  * Releases a thread that is waiting for the array to become quiesced.
  801  * access_suspend_mutex should be locked upon calling this.
  802  */
  803 void
  804 rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc)
  805 {
  806         if (rf_quiesceDebug) {
  807                 printf("raid%d: Signalling quiescence lock.\n",
  808                        raidPtr->raidid);
  809         }
  810         raidPtr->access_suspend_release = 1;
  811 
  812         if (raidPtr->waiting_for_quiescence) {
  813                 SIGNAL_QUIESCENT_COND(raidPtr);
  814         }
  815 }
  816 
  817 
  818 /*
  819  * Suspends all new requests to the array. No effect on accesses that are
  820  * in flight.
  821  */
  822 int
  823 rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
  824 {
  825         if (rf_quiesceDebug)
  826                 printf("Suspending new reqs.\n");
  827 
  828         RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
  829         raidPtr->accesses_suspended++;
  830         raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
  831 
  832         if (raidPtr->waiting_for_quiescence) {
  833                 raidPtr->access_suspend_release = 0;
  834                 while (!raidPtr->access_suspend_release) {
  835                         printf("Suspending: Waiting for Quiescence.\n");
  836                         WAIT_FOR_QUIESCENCE(raidPtr);
  837                         raidPtr->waiting_for_quiescence = 0;
  838                 }
  839         }
  840         printf("Quiescence reached...\n");
  841 
  842         RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
  843         return (raidPtr->waiting_for_quiescence);
  844 }
  845 
  846 
  847 /* Wake up everyone waiting for quiescence to be released. */
  848 void
  849 rf_ResumeNewRequests(RF_Raid_t *raidPtr)
  850 {
  851         RF_CallbackDesc_t *t, *cb;
  852 
  853         if (rf_quiesceDebug)
  854                 printf("Resuming new reqs.\n");
  855 
  856         RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
  857         raidPtr->accesses_suspended--;
  858         if (raidPtr->accesses_suspended == 0)
  859                 cb = raidPtr->quiesce_wait_list;
  860         else
  861                 cb = NULL;
  862         raidPtr->quiesce_wait_list = NULL;
  863         RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
  864 
  865         while (cb) {
  866                 t = cb;
  867                 cb = cb->next;
  868                 (t->callbackFunc) (t->callbackArg);
  869                 rf_FreeCallbackDesc(t);
  870         }
  871 }
  872 
  873 
  874 /*****************************************************************************
  875  *
  876  * Debug routines.
  877  *
  878  *****************************************************************************/
  879 
  880 void
  881 rf_set_debug_option(char *name, long val)
  882 {
  883         RF_DebugName_t *p;
  884 
  885         for (p = rf_debugNames; p->name; p++) {
  886                 if (!strcmp(p->name, name)) {
  887                         *(p->ptr) = val;
  888                         printf("[Set debug variable %s to %ld]\n", name, val);
  889                         return;
  890                 }
  891         }
  892         RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
  893 }
  894 
  895 
  896 /* Would like to use sscanf here, but apparently not available in kernel. */
  897 /*ARGSUSED*/
  898 void
  899 rf_ConfigureDebug(RF_Config_t *cfgPtr)
  900 {
  901         char *val_p, *name_p, *white_p;
  902         long val;
  903         int i;
  904 
  905         rf_ResetDebugOptions();
  906         for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
  907                 name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
  908                 white_p = rf_find_white(name_p);        /*
  909                                                          * Skip to start of 2nd
  910                                                          * word.
  911                                                          */
  912                 val_p = rf_find_non_white(white_p);
  913                 if (*val_p == '0' && *(val_p + 1) == 'x')
  914                         val = rf_htoi(val_p + 2);
  915                 else
  916                         val = rf_atoi(val_p);
  917                 *white_p = '\0';
  918                 rf_set_debug_option(name_p, val);
  919         }
  920 }
  921 
  922 
  923 /* Performance monitoring stuff. */
  924 
  925 #if     !defined(_KERNEL) && !defined(SIMULATE)
  926 
  927 /*
  928  * Throughput stats currently only used in user-level RAIDframe.
  929  */
  930 
  931 int
  932 rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  933     RF_Config_t *cfgPtr)
  934 {
  935         int rc;
  936 
  937         /* These used by user-level RAIDframe only. */
  938         rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex);
  939         if (rc) {
  940                 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n",
  941                     __FILE__, __LINE__, rc);
  942                 return (rc);
  943         }
  944         raidPtr->throughputstats.sum_io_us = 0;
  945         raidPtr->throughputstats.num_ios = 0;
  946         raidPtr->throughputstats.num_out_ios = 0;
  947         return (0);
  948 }
  949 
  950 void
  951 rf_StartThroughputStats(RF_Raid_t *raidPtr)
  952 {
  953         RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
  954         raidPtr->throughputstats.num_ios++;
  955         raidPtr->throughputstats.num_out_ios++;
  956         if (raidPtr->throughputstats.num_out_ios == 1)
  957                 RF_GETTIME(raidPtr->throughputstats.start);
  958         RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
  959 }
  960 
  961 void
  962 rf_StopThroughputStats(RF_Raid_t *raidPtr)
  963 {
  964         struct timeval diff;
  965 
  966         RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
  967         raidPtr->throughputstats.num_out_ios--;
  968         if (raidPtr->throughputstats.num_out_ios == 0) {
  969                 RF_GETTIME(raidPtr->throughputstats.stop);
  970                 RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start,
  971                     &raidPtr->throughputstats.stop, &diff);
  972                 raidPtr->throughputstats.sum_io_us += RF_TIMEVAL_TO_US(diff);
  973         }
  974         RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
  975 }
  976 
  977 void
  978 rf_PrintThroughputStats(RF_Raid_t *raidPtr)
  979 {
  980         RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0);
  981         if (raidPtr->throughputstats.sum_io_us != 0) {
  982                 printf("[Througphut: %8.2f IOs/second]\n",
  983                     raidPtr->throughputstats.num_ios /
  984                     (raidPtr->throughputstats.sum_io_us / 1000000.0));
  985         }
  986 }
  987 
  988 #endif  /* !_KERNEL && !SIMULATE */
  989 
  990 void
  991 rf_StartUserStats(RF_Raid_t *raidPtr)
  992 {
  993         RF_GETTIME(raidPtr->userstats.start);
  994         raidPtr->userstats.sum_io_us = 0;
  995         raidPtr->userstats.num_ios = 0;
  996         raidPtr->userstats.num_sect_moved = 0;
  997 }
  998 
  999 void
 1000 rf_StopUserStats(RF_Raid_t *raidPtr)
 1001 {
 1002         RF_GETTIME(raidPtr->userstats.stop);
 1003 }
 1004 
 1005 void
 1006 rf_UpdateUserStats(
 1007     RF_Raid_t   *raidPtr,
 1008     int          rt,            /* Response time in us. */
 1009     int          numsect        /* Number of sectors for this access. */
 1010 )
 1011 {
 1012         raidPtr->userstats.sum_io_us += rt;
 1013         raidPtr->userstats.num_ios++;
 1014         raidPtr->userstats.num_sect_moved += numsect;
 1015 }
 1016 
 1017 void
 1018 rf_PrintUserStats(RF_Raid_t *raidPtr)
 1019 {
 1020         long    elapsed_us, mbs, mbs_frac;
 1021         struct timeval diff;
 1022 
 1023         RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop,
 1024             &diff);
 1025         elapsed_us = RF_TIMEVAL_TO_US(diff);
 1026 
 1027         /* 2000 sectors per megabyte, 10000000 microseconds per second. */
 1028         if (elapsed_us)
 1029                 mbs = (raidPtr->userstats.num_sect_moved / 2000) /
 1030                     (elapsed_us / 1000000);
 1031         else
 1032                 mbs = 0;
 1033 
 1034         /* This computes only the first digit of the fractional mb/s moved. */
 1035         if (elapsed_us) {
 1036                 mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) /
 1037                     (elapsed_us / 1000000)) - (mbs * 10);
 1038         } else {
 1039                 mbs_frac = 0;
 1040         }
 1041 
 1042         printf("Number of I/Os:             %ld\n",
 1043             raidPtr->userstats.num_ios);
 1044         printf("Elapsed time (us):          %ld\n",
 1045             elapsed_us);
 1046         printf("User I/Os per second:       %ld\n",
 1047             RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000)));
 1048         printf("Average user response time: %ld us\n",
 1049             RF_DB0_CHECK(raidPtr->userstats.sum_io_us,
 1050              raidPtr->userstats.num_ios));
 1051         printf("Total sectors moved:        %ld\n",
 1052             raidPtr->userstats.num_sect_moved);
 1053         printf("Average access size (sect): %ld\n",
 1054             RF_DB0_CHECK(raidPtr->userstats.num_sect_moved,
 1055             raidPtr->userstats.num_ios));
 1056         printf("Achieved data rate:         %ld.%ld MB/sec\n",
 1057             mbs, mbs_frac);
 1058 }

/* [<][>][^][v][top][bottom][index][help] */