root/dev/raidframe/rf_raid5.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. RF_Raid5ConfigInfo_t
  2. rf_ConfigureRAID5
  3. rf_GetDefaultNumFloatingReconBuffersRAID5
  4. rf_GetDefaultHeadSepLimitRAID5
  5. rf_ShutdownRAID5
  6. rf_MapSectorRAID5
  7. rf_MapParityRAID5
  8. rf_IdentifyStripeRAID5
  9. rf_MapSIDToPSIDRAID5
  10. rf_RaidFiveDagSelect

    1 /*      $OpenBSD: rf_raid5.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $     */
    2 /*      $NetBSD: rf_raid5.c,v 1.4 2000/01/08 22:57:30 oster Exp $       */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: Mark Holland
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*****************************************************************************
   32  *
   33  * rf_raid5.c -- Implements RAID Level 5.
   34  *
   35  *****************************************************************************/
   36 
   37 #include "rf_types.h"
   38 #include "rf_raid.h"
   39 #include "rf_raid5.h"
   40 #include "rf_dag.h"
   41 #include "rf_dagffrd.h"
   42 #include "rf_dagffwr.h"
   43 #include "rf_dagdegrd.h"
   44 #include "rf_dagdegwr.h"
   45 #include "rf_dagutils.h"
   46 #include "rf_general.h"
   47 #include "rf_map.h"
   48 #include "rf_utils.h"
   49 
   50 typedef struct RF_Raid5ConfigInfo_s {
   51         RF_RowCol_t **stripeIdentifier; /*
   52                                          * Filled in at config time and used
   53                                          * by IdentifyStripe.
   54                                          */
   55 } RF_Raid5ConfigInfo_t;
   56 
   57 
   58 int
   59 rf_ConfigureRAID5(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
   60     RF_Config_t *cfgPtr)
   61 {
   62         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
   63         RF_Raid5ConfigInfo_t *info;
   64         RF_RowCol_t i, j, startdisk;
   65 
   66         /* Create a RAID level 5 configuration structure. */
   67         RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t),
   68             (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList);
   69         if (info == NULL)
   70                 return (ENOMEM);
   71         layoutPtr->layoutSpecificInfo = (void *) info;
   72 
   73         RF_ASSERT(raidPtr->numRow == 1);
   74 
   75         /*
   76          * The stripe identifier must identify the disks in each stripe, IN
   77          * THE ORDER THAT THEY APPEAR IN THE STRIPE.
   78          */
   79         info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol,
   80             raidPtr->numCol, raidPtr->cleanupList);
   81         if (info->stripeIdentifier == NULL)
   82                 return (ENOMEM);
   83         startdisk = 0;
   84         for (i = 0; i < raidPtr->numCol; i++) {
   85                 for (j = 0; j < raidPtr->numCol; j++) {
   86                         info->stripeIdentifier[i][j] = (startdisk + j) %
   87                             raidPtr->numCol;
   88                 }
   89                 if ((--startdisk) < 0)
   90                         startdisk = raidPtr->numCol - 1;
   91         }
   92 
   93         /* Fill in the remaining layout parameters. */
   94         layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
   95         layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
   96             raidPtr->logBytesPerSector;
   97         layoutPtr->numDataCol = raidPtr->numCol - 1;
   98         layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol *
   99             layoutPtr->sectorsPerStripeUnit;
  100         layoutPtr->numParityCol = 1;
  101         layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
  102 
  103         raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
  104             layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
  105 
  106         return (0);
  107 }
  108 
  109 int
  110 rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr)
  111 {
  112         return (20);
  113 }
  114 
  115 RF_HeadSepLimit_t
  116 rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr)
  117 {
  118         return (10);
  119 }
  120 
  121 #if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL)
  122 /* Not currently used. */
  123 int
  124 rf_ShutdownRAID5(RF_Raid_t *raidPtr)
  125 {
  126         return (0);
  127 }
  128 #endif
  129 
  130 void
  131 rf_MapSectorRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
  132     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
  133 {
  134         RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
  135         *row = 0;
  136         *col = (SUID % raidPtr->numCol);
  137         *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
  138             raidPtr->Layout.sectorsPerStripeUnit +
  139             (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
  140 }
  141 
  142 void
  143 rf_MapParityRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
  144     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
  145 {
  146         RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
  147 
  148         *row = 0;
  149         *col = raidPtr->Layout.numDataCol -
  150             (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol;
  151         *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
  152             raidPtr->Layout.sectorsPerStripeUnit +
  153             (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
  154 }
  155 
  156 void
  157 rf_IdentifyStripeRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
  158     RF_RowCol_t **diskids, RF_RowCol_t *outRow)
  159 {
  160         RF_StripeNum_t stripeID =
  161             rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
  162         RF_Raid5ConfigInfo_t *info =
  163             (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  164 
  165         *outRow = 0;
  166         *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
  167 }
  168 
  169 void
  170 rf_MapSIDToPSIDRAID5(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
  171     RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
  172 {
  173         *which_ru = 0;
  174         *psID = stripeID;
  175 }
  176 
  177 
  178 /*
  179  * Select an algorithm for performing an access.  Returns two pointers,
  180  * one to a function that will return information about the DAG, and
  181  * another to a function that will create the dag.
  182  */
  183 void
  184 rf_RaidFiveDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
  185     RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
  186 {
  187         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  188         RF_PhysDiskAddr_t *failedPDA = NULL;
  189         RF_RowCol_t frow, fcol;
  190         RF_RowStatus_t rstat;
  191         int prior_recon;
  192 
  193         RF_ASSERT(RF_IO_IS_R_OR_W(type));
  194 
  195         if (asmap->numDataFailed + asmap->numParityFailed > 1) {
  196                 RF_ERRORMSG("Multiple disks failed in a single group !"
  197                             "  Aborting I/O operation.\n");
  198                  /* *infoFunc = */ *createFunc = NULL;
  199                 return;
  200         } else
  201                 if (asmap->numDataFailed + asmap->numParityFailed == 1) {
  202 
  203                         /*
  204                          * If under recon & already reconstructed, redirect
  205                          * the access to the spare drive and eliminate the
  206                          * failure indication.
  207                          */
  208                         failedPDA = asmap->failedPDAs[0];
  209                         frow = failedPDA->row;
  210                         fcol = failedPDA->col;
  211                         rstat = raidPtr->status[failedPDA->row];
  212                         prior_recon = (rstat == rf_rs_reconfigured) || (
  213                             (rstat == rf_rs_reconstructing) ?
  214                             rf_CheckRUReconstructed(raidPtr
  215                              ->reconControl[frow]->reconMap,
  216                              failedPDA->startSector) : 0);
  217                         if (prior_recon) {
  218                                 RF_RowCol_t or = failedPDA->row;
  219                                 RF_RowCol_t oc = failedPDA->col;
  220                                 RF_SectorNum_t oo = failedPDA->startSector;
  221 
  222                                 if (layoutPtr->map->flags &
  223                                     RF_DISTRIBUTE_SPARE) {
  224                                         /* Redirect to dist spare space. */
  225 
  226                                         if (failedPDA == asmap->parityInfo) {
  227 
  228                                                 /* Parity has failed. */
  229                                                 (layoutPtr->map->MapParity)
  230                                                     (raidPtr,
  231                                                      failedPDA->raidAddress,
  232                                                      &failedPDA->row,
  233                                                      &failedPDA->col,
  234                                                      &failedPDA->startSector,
  235                                                      RF_REMAP);
  236 
  237                                                 if (asmap->parityInfo->next) {
  238                                                         /*
  239                                                          * Redir 2nd component,
  240                                                          * if any.
  241                                                          */
  242                                                         RF_PhysDiskAddr_t *p =
  243                                                             asmap
  244                                                              ->parityInfo->next;
  245                                                         RF_SectorNum_t SUoffs =
  246                                                             p->startSector %
  247                                                 layoutPtr->sectorsPerStripeUnit;
  248                                                         p->row = failedPDA->row;
  249                                                         p->col = failedPDA->col;
  250                                                         /*
  251                                                          * Cheating:
  252                                                          * startSector is not
  253                                                          * really a RAID
  254                                                          * address.
  255                                                          */
  256                                                         p->startSector =
  257                                         rf_RaidAddressOfPrevStripeUnitBoundary(
  258                                             layoutPtr, failedPDA->startSector) +
  259                                                             SUoffs;
  260                                                 }
  261                                         } else
  262                                                 if (asmap->parityInfo->next &&
  263                                                     failedPDA ==
  264                                                     asmap->parityInfo->next) {
  265                                                         /*
  266                                                          * Should never happen.
  267                                                          */
  268                                                         RF_ASSERT(0);
  269                                                 } else {
  270                                                         /* Data has failed. */
  271                                                         (layoutPtr->map
  272                                                          ->MapSector) (raidPtr,
  273                                                          failedPDA->raidAddress,
  274                                                             &failedPDA->row,
  275                                                             &failedPDA->col,
  276                                                         &failedPDA->startSector,
  277                                                             RF_REMAP);
  278                                                 }
  279 
  280                                 } else {
  281                                         /* Redirect to dedicated spare space. */
  282 
  283                                         failedPDA->row =
  284                                             raidPtr->Disks[frow][fcol].spareRow;
  285                                         failedPDA->col =
  286                                             raidPtr->Disks[frow][fcol].spareCol;
  287 
  288                                         /*
  289                                          * The parity may have two distinct
  290                                          * components, both of which may need
  291                                          * to be redirected.
  292                                          */
  293                                         if (asmap->parityInfo->next) {
  294                                                 if (failedPDA ==
  295                                                     asmap->parityInfo) {
  296                                                         failedPDA->next->row =
  297                                                             failedPDA->row;
  298                                                         failedPDA->next->col =
  299                                                             failedPDA->col;
  300                                                 } else {
  301                                                         if (failedPDA ==
  302                                                             asmap->parityInfo
  303                                                              ->next) {
  304                                                                 /*
  305                                                                  * Paranoid:
  306                                                                  * Should never
  307                                                                  * occur.
  308                                                                  */
  309                                                                 asmap
  310                                                                  ->parityInfo
  311                                                                  ->row =
  312                                                                  failedPDA->row;
  313                                                                 asmap
  314                                                                  ->parityInfo
  315                                                                  ->col =
  316                                                                  failedPDA->col;
  317                                                         }
  318                                                 }
  319                                         }
  320                                 }
  321 
  322                                 RF_ASSERT(failedPDA->col != -1);
  323 
  324                                 if (rf_dagDebug || rf_mapDebug) {
  325                                         printf("raid%d: Redirected type '%c'"
  326                                                " r %d c %d o %ld -> r %d c %d"
  327                                                " o %ld\n", raidPtr->raidid,
  328                                                type, or, oc, (long) oo,
  329                                                failedPDA->row, failedPDA->col,
  330                                                (long) failedPDA->startSector);
  331                                 }
  332                                 asmap->numDataFailed = asmap->numParityFailed
  333                                                      = 0;
  334                         }
  335                 }
  336         /*
  337          * All DAGs begin/end with block/unblock node. Therefore, hdrSucc &
  338          * termAnt counts should always be 1. Also, these counts should not be
  339          * visible outside DAG creation routines - manipulating the counts
  340          * here should be removed.
  341          */
  342         if (type == RF_IO_TYPE_READ) {
  343                 if (asmap->numDataFailed == 0)
  344                         *createFunc = (RF_VoidFuncPtr)
  345                             rf_CreateFaultFreeReadDAG;
  346                 else
  347                         *createFunc = (RF_VoidFuncPtr)
  348                             rf_CreateRaidFiveDegradedReadDAG;
  349         } else {
  350                 /*
  351                  * If mirroring, always use large writes. If the access
  352                  * requires two distinct parity updates, always do a small
  353                  * write. If the stripe contains a failure but the access
  354                  * does not, do a small write. The first conditional
  355                  * (numStripeUnitsAccessed <= numDataCol/2) uses a
  356                  * less-than-or-equal rather than just a less-than because
  357                  * when G is 3 or 4, numDataCol/2 is 1, and I want
  358                  * single-stripe-unit updates to use just one disk.
  359                  */
  360                 if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
  361                         if (rf_suppressLocksAndLargeWrites ||
  362                             (((asmap->numStripeUnitsAccessed <=
  363                                (layoutPtr->numDataCol / 2)) &&
  364                               (layoutPtr->numDataCol != 1)) ||
  365                              (asmap->parityInfo->next != NULL) ||
  366                              rf_CheckStripeForFailures(raidPtr, asmap))) {
  367                                 *createFunc = (RF_VoidFuncPtr)
  368                                     rf_CreateSmallWriteDAG;
  369                         } else
  370                                 *createFunc = (RF_VoidFuncPtr)
  371                                     rf_CreateLargeWriteDAG;
  372                 } else {
  373                         if (asmap->numParityFailed == 1)
  374                                 *createFunc = (RF_VoidFuncPtr)
  375                                     rf_CreateNonRedundantWriteDAG;
  376                         else
  377                                 if (asmap->numStripeUnitsAccessed != 1 &&
  378                                     failedPDA->numSector !=
  379                                     layoutPtr->sectorsPerStripeUnit)
  380                                         *createFunc = NULL;
  381                                 else
  382                                         *createFunc = (RF_VoidFuncPtr)
  383                                             rf_CreateDegradedWriteDAG;
  384                 }
  385         }
  386 }

/* [<][>][^][v][top][bottom][index][help] */