root/dev/raidframe/rf_map.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_MapAccess
  2. rf_MarkFailuresInASMList
  3. rf_DuplicateASM
  4. rf_DuplicatePDA
  5. rf_ShutdownMapModule
  6. rf_ConfigureMapModule
  7. rf_AllocAccessStripeMapHeader
  8. rf_FreeAccessStripeMapHeader
  9. rf_AllocPhysDiskAddr
  10. rf_AllocPDAList
  11. rf_FreePhysDiskAddr
  12. rf_FreePDAList
  13. rf_AllocAccessStripeMapComponent
  14. rf_AllocASMList
  15. rf_FreeAccessStripeMapComponent
  16. rf_FreeASMList
  17. rf_FreeAccessStripeMap
  18. rf_CheckStripeForFailures
  19. rf_NumFailedDataUnitsInStripe
  20. rf_PrintAccessStripeMap
  21. rf_PrintFullAccessStripeMap
  22. rf_PrintRaidAddressInfo
  23. rf_ASMParityAdjust
  24. rf_ASMCheckStatus

    1 /*      $OpenBSD: rf_map.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $       */
    2 /*      $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: Mark Holland
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*****************************************************************************
   32  *
   33  * map.c -- Main code for mapping RAID addresses to physical disk addresses.
   34  *
   35  *****************************************************************************/
   36 
   37 #include "rf_types.h"
   38 #include "rf_threadstuff.h"
   39 #include "rf_raid.h"
   40 #include "rf_general.h"
   41 #include "rf_map.h"
   42 #include "rf_freelist.h"
   43 #include "rf_shutdown.h"
   44 
   45 void rf_FreePDAList(RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *, int);
   46 void rf_FreeASMList(RF_AccessStripeMap_t *, RF_AccessStripeMap_t *, int);
   47 
   48 /*****************************************************************************
   49  *
   50  * MapAccess -- Main 1st order mapping routine.
   51  *
   52  * Maps an access in the RAID address space to the corresponding set of
   53  * physical disk addresses. The result is returned as a list of
   54  * AccessStripeMap structures, one per stripe accessed. Each ASM structure
   55  * contains a pointer to a list of PhysDiskAddr structures, which describe
   56  * the physical locations touched by the user access. Note that this routine
   57  * returns only static mapping information, i.e. the list of physical
   58  * addresses returned does not necessarily identify the set of physical
   59  * locations that will actually be read or written.
   60  *
   61  * The routine also maps the parity. The physical disk location returned
   62  * always indicates the entire parity unit, even when only a subset of it
   63  * is being accessed. This is because an access that is not stripe unit
   64  * aligned but that spans a stripe unit boundary may require access two
   65  * distinct portions of the parity unit, and we can't yet tell which
   66  * portion(s) we'll actually need. We leave it up to the algorithm
   67  * selection code to decide what subset of the parity unit to access.
   68  *
   69  * Note that addresses in the RAID address space must always be maintained
   70  * as longs, instead of ints.
   71  *
   72  * This routine returns NULL if numBlocks is 0.
   73  *
   74  *****************************************************************************/
   75 
   76 RF_AccessStripeMapHeader_t *
   77 rf_MapAccess(
   78         RF_Raid_t       *raidPtr,
   79         RF_RaidAddr_t    raidAddress,   /*
   80                                          * Starting address in RAID address
   81                                          * space.
   82                                          */
   83         RF_SectorCount_t numBlocks,     /*
   84                                          * Number of blocks in RAID address
   85                                          * space to access.
   86                                          */
   87         caddr_t          buffer,        /* Buffer to supply/receive data. */
   88         int              remap          /*
   89                                          * 1 => remap addresses to spare space.
   90                                          */
   91 )
   92 {
   93         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
   94         RF_AccessStripeMapHeader_t *asm_hdr = NULL;
   95         RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
   96         int faultsTolerated = layoutPtr->map->faultsTolerated;
   97         /* We'll change raidAddress along the way. */
   98         RF_RaidAddr_t startAddress = raidAddress;
   99         RF_RaidAddr_t endAddress = raidAddress + numBlocks;
  100         RF_RaidDisk_t **disks = raidPtr->Disks;
  101 
  102         RF_PhysDiskAddr_t *pda_p, *pda_q;
  103         RF_StripeCount_t numStripes = 0;
  104         RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress;
  105         RF_RaidAddr_t nextStripeUnitAddress;
  106         RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
  107         RF_StripeCount_t totStripes;
  108         RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
  109         RF_AccessStripeMap_t *asmList, *t_asm;
  110         RF_PhysDiskAddr_t *pdaList, *t_pda;
  111 
  112         /* Allocate all the ASMs and PDAs up front. */
  113         lastRaidAddr = raidAddress + numBlocks - 1;
  114         stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
  115         lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
  116         totStripes = lastSID - stripeID + 1;
  117         SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
  118         lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);
  119 
  120         asmList = rf_AllocASMList(totStripes);
  121         pdaList = rf_AllocPDAList(lastSUID - SUID + 1 +
  122             faultsTolerated * totStripes);      /*
  123                                                  * May also need pda(s)
  124                                                  * per stripe for parity.
  125                                                  */
  126 
  127         if (raidAddress + numBlocks > raidPtr->totalSectors) {
  128                 RF_ERRORMSG1("Unable to map access because offset (%d)"
  129                     " was invalid\n", (int) raidAddress);
  130                 return (NULL);
  131         }
  132         if (rf_mapDebug)
  133                 rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
  134         for (; raidAddress < endAddress;) {
  135                 /* Make the next stripe structure. */
  136                 RF_ASSERT(asmList);
  137                 t_asm = asmList;
  138                 asmList = asmList->next;
  139                 bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t));
  140                 if (!asm_p)
  141                         asm_list = asm_p = t_asm;
  142                 else {
  143                         asm_p->next = t_asm;
  144                         asm_p = asm_p->next;
  145                 }
  146                 numStripes++;
  147 
  148                 /* Map SUs from current location to the end of the stripe. */
  149                 asm_p->stripeID =
  150                 /* rf_RaidAddressToStripeID(layoutPtr, raidAddress) */
  151                     stripeID++;
  152                 stripeRealEndAddress =
  153                     rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
  154                 stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress);
  155                 asm_p->raidAddress = raidAddress;
  156                 asm_p->endRaidAddress = stripeEndAddress;
  157 
  158                 /* Map each stripe unit in the stripe. */
  159                 pda_p = NULL;
  160                 /*
  161                  * Raid addr of start of portion of access that is within this
  162                  * stripe.
  163                  */
  164                 startAddrWithinStripe = raidAddress;
  165 
  166                 for (; raidAddress < stripeEndAddress;) {
  167                         RF_ASSERT(pdaList);
  168                         t_pda = pdaList;
  169                         pdaList = pdaList->next;
  170                         bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
  171                         if (!pda_p)
  172                                 asm_p->physInfo = pda_p = t_pda;
  173                         else {
  174                                 pda_p->next = t_pda;
  175                                 pda_p = pda_p->next;
  176                         }
  177 
  178                         pda_p->type = RF_PDA_TYPE_DATA;
  179                         (layoutPtr->map->MapSector) (raidPtr, raidAddress,
  180                             &(pda_p->row), &(pda_p->col),
  181                             &(pda_p->startSector), remap);
  182 
  183                         /*
  184                          * Mark any failures we find.
  185                          * failedPDA is don't-care if there is more than
  186                          * one failure.
  187                          */
  188                         /*
  189                          * The RAID address corresponding to this physical
  190                          * disk address.
  191                          */
  192                         pda_p->raidAddress = raidAddress;
  193                         nextStripeUnitAddress =
  194                             rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
  195                              raidAddress);
  196                         pda_p->numSector = RF_MIN(endAddress,
  197                             nextStripeUnitAddress) - raidAddress;
  198                         RF_ASSERT(pda_p->numSector != 0);
  199                         rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0);
  200                         pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr,
  201                             (raidAddress - startAddress));
  202                         asm_p->totalSectorsAccessed += pda_p->numSector;
  203                         asm_p->numStripeUnitsAccessed++;
  204                         asm_p->origRow = pda_p->row;    /*
  205                                                          * Redundant but
  206                                                          * harmless to do this
  207                                                          * in every loop
  208                                                          * iteration.
  209                                                          */
  210 
  211                         raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
  212                 }
  213 
  214                 /*
  215                  * Map the parity. At this stage, the startSector and
  216                  * numSector fields for the parity unit are always set to
  217                  * indicate the entire parity unit. We may modify this after
  218                  * mapping the data portion.
  219                  */
  220                 switch (faultsTolerated) {
  221                 case 0:
  222                         break;
  223                 case 1: /* Single fault tolerant. */
  224                         RF_ASSERT(pdaList);
  225                         t_pda = pdaList;
  226                         pdaList = pdaList->next;
  227                         bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
  228                         pda_p = asm_p->parityInfo = t_pda;
  229                         pda_p->type = RF_PDA_TYPE_PARITY;
  230                         (layoutPtr->map->MapParity) (raidPtr,
  231                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  232                              startAddrWithinStripe), &(pda_p->row),
  233                             &(pda_p->col), &(pda_p->startSector), remap);
  234                         pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
  235                         /*
  236                          * raidAddr may be needed to find unit to redirect to.
  237                          */
  238                         pda_p->raidAddress =
  239                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  240                              startAddrWithinStripe);
  241                         rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
  242                         rf_ASMParityAdjust(asm_p->parityInfo,
  243                             startAddrWithinStripe, endAddress,
  244                             layoutPtr, asm_p);
  245 
  246                         break;
  247                 case 2: /* Two fault tolerant. */
  248                         RF_ASSERT(pdaList && pdaList->next);
  249                         t_pda = pdaList;
  250                         pdaList = pdaList->next;
  251                         bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
  252                         pda_p = asm_p->parityInfo = t_pda;
  253                         pda_p->type = RF_PDA_TYPE_PARITY;
  254                         t_pda = pdaList;
  255                         pdaList = pdaList->next;
  256                         bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
  257                         pda_q = asm_p->qInfo = t_pda;
  258                         pda_q->type = RF_PDA_TYPE_Q;
  259                         (layoutPtr->map->MapParity) (raidPtr,
  260                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  261                              startAddrWithinStripe), &(pda_p->row),
  262                             &(pda_p->col), &(pda_p->startSector), remap);
  263                         (layoutPtr->map->MapQ) (raidPtr,
  264                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  265                              startAddrWithinStripe), &(pda_q->row),
  266                             &(pda_q->col), &(pda_q->startSector), remap);
  267                         pda_q->numSector = pda_p->numSector =
  268                             layoutPtr->sectorsPerStripeUnit;
  269                         /*
  270                          * raidAddr may be needed to find unit to redirect to.
  271                          */
  272                         pda_p->raidAddress =
  273                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  274                              startAddrWithinStripe);
  275                         pda_q->raidAddress =
  276                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  277                              startAddrWithinStripe);
  278                         /* Failure mode stuff. */
  279                         rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
  280                         rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1);
  281                         rf_ASMParityAdjust(asm_p->parityInfo,
  282                             startAddrWithinStripe, endAddress,
  283                             layoutPtr, asm_p);
  284                         rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe,
  285                             endAddress, layoutPtr, asm_p);
  286                         break;
  287                 }
  288         }
  289         RF_ASSERT(asmList == NULL && pdaList == NULL);
  290         /* Make the header structure. */
  291         asm_hdr = rf_AllocAccessStripeMapHeader();
  292         RF_ASSERT(numStripes == totStripes);
  293         asm_hdr->numStripes = numStripes;
  294         asm_hdr->stripeMap = asm_list;
  295 
  296         if (rf_mapDebug)
  297                 rf_PrintAccessStripeMap(asm_hdr);
  298         return (asm_hdr);
  299 }
  300 
  301 /*****************************************************************************
  302  * This routine walks through an ASM list and marks the PDAs that have failed.
  303  * It's called only when a disk failure causes an in-flight DAG to fail.
  304  * The parity may consist of two components, but we want to use only one
  305  * failedPDA pointer. Thus we set failedPDA to point to the first parity
  306  * component, and rely on the rest of the code to do the right thing with this.
  307  *****************************************************************************/
  308 void
  309 rf_MarkFailuresInASMList(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asm_h)
  310 {
  311         RF_RaidDisk_t **disks = raidPtr->Disks;
  312         RF_AccessStripeMap_t *asmap;
  313         RF_PhysDiskAddr_t *pda;
  314 
  315         for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
  316                 asmap->numDataFailed = asmap->numParityFailed =
  317                     asmap->numQFailed = 0;
  318                 asmap->numFailedPDAs = 0;
  319                 bzero((char *) asmap->failedPDAs,
  320                     RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *));
  321                 for (pda = asmap->physInfo; pda; pda = pda->next) {
  322                         if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
  323                                 asmap->numDataFailed++;
  324                                 asmap->failedPDAs[asmap->numFailedPDAs] = pda;
  325                                 asmap->numFailedPDAs++;
  326                         }
  327                 }
  328                 pda = asmap->parityInfo;
  329                 if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
  330                         asmap->numParityFailed++;
  331                         asmap->failedPDAs[asmap->numFailedPDAs] = pda;
  332                         asmap->numFailedPDAs++;
  333                 }
  334                 pda = asmap->qInfo;
  335                 if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
  336                         asmap->numQFailed++;
  337                         asmap->failedPDAs[asmap->numFailedPDAs] = pda;
  338                         asmap->numFailedPDAs++;
  339                 }
  340         }
  341 }
  342 
  343 /*****************************************************************************
  344  *
  345  * DuplicateASM -- Duplicates an ASM and returns the new one.
  346  *
  347  *****************************************************************************/
  348 RF_AccessStripeMap_t *
  349 rf_DuplicateASM(RF_AccessStripeMap_t *asmap)
  350 {
  351         RF_AccessStripeMap_t *new_asm;
  352         RF_PhysDiskAddr_t *pda, *new_pda, *t_pda;
  353 
  354         new_pda = NULL;
  355         new_asm = rf_AllocAccessStripeMapComponent();
  356         bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t));
  357         new_asm->numFailedPDAs = 0;     /* ??? */
  358         new_asm->failedPDAs[0] = NULL;
  359         new_asm->physInfo = NULL;
  360         new_asm->parityInfo = NULL;
  361         new_asm->next = NULL;
  362 
  363         for (pda = asmap->physInfo; pda; pda = pda->next) {
  364                 /* Copy the physInfo list. */
  365                 t_pda = rf_AllocPhysDiskAddr();
  366                 bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
  367                 t_pda->next = NULL;
  368                 if (!new_asm->physInfo) {
  369                         new_asm->physInfo = t_pda;
  370                         new_pda = t_pda;
  371                 } else {
  372                         new_pda->next = t_pda;
  373                         new_pda = new_pda->next;
  374                 }
  375                 if (pda == asmap->failedPDAs[0])
  376                         new_asm->failedPDAs[0] = t_pda;
  377         }
  378         for (pda = asmap->parityInfo; pda; pda = pda->next) {
  379                 /* Copy the parityInfo list. */
  380                 t_pda = rf_AllocPhysDiskAddr();
  381                 bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
  382                 t_pda->next = NULL;
  383                 if (!new_asm->parityInfo) {
  384                         new_asm->parityInfo = t_pda;
  385                         new_pda = t_pda;
  386                 } else {
  387                         new_pda->next = t_pda;
  388                         new_pda = new_pda->next;
  389                 }
  390                 if (pda == asmap->failedPDAs[0])
  391                         new_asm->failedPDAs[0] = t_pda;
  392         }
  393         return (new_asm);
  394 }
  395 
  396 /*****************************************************************************
  397  *
  398  * DuplicatePDA -- Duplicates a PDA and returns the new one.
  399  *
  400  *****************************************************************************/
  401 RF_PhysDiskAddr_t *
  402 rf_DuplicatePDA(RF_PhysDiskAddr_t *pda)
  403 {
  404         RF_PhysDiskAddr_t *new;
  405 
  406         new = rf_AllocPhysDiskAddr();
  407         bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t));
  408         return (new);
  409 }
  410 
  411 /*****************************************************************************
  412  *
  413  * Routines to allocate and free list elements. All allocation routines zero
  414  * the structure before returning it.
  415  *
  416  * FreePhysDiskAddr is static. It should never be called directly, because
  417  * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list.
  418  *
  419  *****************************************************************************/
  420 
  421 static RF_FreeList_t *rf_asmhdr_freelist;
  422 #define RF_MAX_FREE_ASMHDR              128
  423 #define RF_ASMHDR_INC                    16
  424 #define RF_ASMHDR_INITIAL                32
  425 
  426 static RF_FreeList_t *rf_asm_freelist;
  427 #define RF_MAX_FREE_ASM                 192
  428 #define RF_ASM_INC                       24
  429 #define RF_ASM_INITIAL                   64
  430 
  431 static RF_FreeList_t *rf_pda_freelist;
  432 #define RF_MAX_FREE_PDA                 192
  433 #define RF_PDA_INC                       24
  434 #define RF_PDA_INITIAL                   64
  435 
  436 /*
  437  * Called at shutdown time. So far, all that is necessary is to release
  438  * all the free lists.
  439  */
  440 void rf_ShutdownMapModule(void *);
  441 void
  442 rf_ShutdownMapModule(void *ignored)
  443 {
  444         RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
  445             (RF_AccessStripeMapHeader_t *));
  446         RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *));
  447         RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *));
  448 }
  449 
  450 int
  451 rf_ConfigureMapModule(RF_ShutdownList_t **listp)
  452 {
  453         int rc;
  454 
  455         RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR,
  456             RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t));
  457         if (rf_asmhdr_freelist == NULL) {
  458                 return (ENOMEM);
  459         }
  460         RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM,
  461             RF_ASM_INC, sizeof(RF_AccessStripeMap_t));
  462         if (rf_asm_freelist == NULL) {
  463                 RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
  464                     (RF_AccessStripeMapHeader_t *));
  465                 return (ENOMEM);
  466         }
  467         RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, RF_PDA_INC,
  468             sizeof(RF_PhysDiskAddr_t));
  469         if (rf_pda_freelist == NULL) {
  470                 RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
  471                     (RF_AccessStripeMapHeader_t *));
  472                 RF_FREELIST_DESTROY(rf_pda_freelist, next,
  473                     (RF_PhysDiskAddr_t *));
  474                 return (ENOMEM);
  475         }
  476         rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL);
  477         if (rc) {
  478                 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
  479                     " rc=%d\n", __FILE__, __LINE__, rc);
  480                 rf_ShutdownMapModule(NULL);
  481                 return (rc);
  482         }
  483         RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next,
  484             (RF_AccessStripeMapHeader_t *));
  485         RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next,
  486             (RF_AccessStripeMap_t *));
  487         RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next,
  488             (RF_PhysDiskAddr_t *));
  489 
  490         return (0);
  491 }
  492 
  493 RF_AccessStripeMapHeader_t *
  494 rf_AllocAccessStripeMapHeader(void)
  495 {
  496         RF_AccessStripeMapHeader_t *p;
  497 
  498         RF_FREELIST_GET(rf_asmhdr_freelist, p, next,
  499             (RF_AccessStripeMapHeader_t *));
  500         bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t));
  501 
  502         return (p);
  503 }
  504 
  505 void
  506 rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p)
  507 {
  508         RF_FREELIST_FREE(rf_asmhdr_freelist, p, next);
  509 }
  510 
  511 RF_PhysDiskAddr_t *
  512 rf_AllocPhysDiskAddr(void)
  513 {
  514         RF_PhysDiskAddr_t *p;
  515 
  516         RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *));
  517         bzero((char *) p, sizeof(RF_PhysDiskAddr_t));
  518 
  519         return (p);
  520 }
  521 
  522 /*
  523  * Allocates a list of PDAs, locking the free list only once.
  524  * When we have to call calloc, we do it one component at a time to simplify
  525  * the process of freeing the list at program shutdown. This should not be
  526  * much of a performance hit, because it should be very infrequently executed.
  527  */
  528 RF_PhysDiskAddr_t *
  529 rf_AllocPDAList(int count)
  530 {
  531         RF_PhysDiskAddr_t *p = NULL;
  532 
  533         RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *),
  534             count);
  535         return (p);
  536 }
  537 
  538 void
  539 rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p)
  540 {
  541         RF_FREELIST_FREE(rf_pda_freelist, p, next);
  542 }
  543 
  544 void
  545 rf_FreePDAList(
  546         /* Pointers to start and end of list. */
  547         RF_PhysDiskAddr_t       *l_start,
  548         RF_PhysDiskAddr_t       *l_end,
  549         int                      count  /* Number of elements in list. */
  550 )
  551 {
  552         RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next,
  553             (RF_PhysDiskAddr_t *), count);
  554 }
  555 
  556 RF_AccessStripeMap_t *
  557 rf_AllocAccessStripeMapComponent(void)
  558 {
  559         RF_AccessStripeMap_t *p;
  560 
  561         RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *));
  562         bzero((char *) p, sizeof(RF_AccessStripeMap_t));
  563 
  564         return (p);
  565 }
  566 
  567 /*
  568  * This is essentially identical to AllocPDAList. I should combine the two.
  569  * When we have to call calloc, we do it one component at a time to simplify
  570  * the process of freeing the list at program shutdown. This should not be
  571  * much of a performance hit, because it should be very infrequently executed.
  572  */
  573 RF_AccessStripeMap_t *
  574 rf_AllocASMList(int count)
  575 {
  576         RF_AccessStripeMap_t *p = NULL;
  577 
  578         RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *),
  579             count);
  580         return (p);
  581 }
  582 
  583 void
  584 rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p)
  585 {
  586         RF_FREELIST_FREE(rf_asm_freelist, p, next);
  587 }
  588 
  589 void
  590 rf_FreeASMList(RF_AccessStripeMap_t *l_start, RF_AccessStripeMap_t *l_end,
  591     int count)
  592 {
  593         RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next,
  594             (RF_AccessStripeMap_t *), count);
  595 }
  596 
  597 void
  598 rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr)
  599 {
  600         RF_AccessStripeMap_t *p, *pt = NULL;
  601         RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
  602         int count = 0, t, asm_count = 0;
  603 
  604         for (p = hdr->stripeMap; p; p = p->next) {
  605 
  606                 /* Link the 3 pda lists into the accumulating pda list. */
  607 
  608                 if (!pdaList)
  609                         pdaList = p->qInfo;
  610                 else
  611                         pdaEnd->next = p->qInfo;
  612                 for (trailer = NULL, pdp = p->qInfo; pdp;) {
  613                         trailer = pdp;
  614                         pdp = pdp->next;
  615                         count++;
  616                 }
  617                 if (trailer)
  618                         pdaEnd = trailer;
  619 
  620                 if (!pdaList)
  621                         pdaList = p->parityInfo;
  622                 else
  623                         pdaEnd->next = p->parityInfo;
  624                 for (trailer = NULL, pdp = p->parityInfo; pdp;) {
  625                         trailer = pdp;
  626                         pdp = pdp->next;
  627                         count++;
  628                 }
  629                 if (trailer)
  630                         pdaEnd = trailer;
  631 
  632                 if (!pdaList)
  633                         pdaList = p->physInfo;
  634                 else
  635                         pdaEnd->next = p->physInfo;
  636                 for (trailer = NULL, pdp = p->physInfo; pdp;) {
  637                         trailer = pdp;
  638                         pdp = pdp->next;
  639                         count++;
  640                 }
  641                 if (trailer)
  642                         pdaEnd = trailer;
  643 
  644                 pt = p;
  645                 asm_count++;
  646         }
  647 
  648         /* Debug only. */
  649         for (t = 0, pdp = pdaList; pdp; pdp = pdp->next)
  650                 t++;
  651         RF_ASSERT(t == count);
  652 
  653         if (pdaList)
  654                 rf_FreePDAList(pdaList, pdaEnd, count);
  655         rf_FreeASMList(hdr->stripeMap, pt, asm_count);
  656         rf_FreeAccessStripeMapHeader(hdr);
  657 }
  658 
  659 /*
  660  * We can't use the large write optimization if there are any failures in the
  661  * stripe.
  662  * In the declustered layout, there is no way to immediately determine what
  663  * disks constitute a stripe, so we actually have to hunt through the stripe
  664  * looking for failures.
  665  * The reason we map the parity instead of just using asm->parityInfo->col is
  666  * because the latter may have been already redirected to a spare drive, which
  667  * would mess up the computation of the stripe offset.
  668  *
  669  * ASSUMES AT MOST ONE FAILURE IN THE STRIPE.
  670  */
  671 int
  672 rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
  673 {
  674         RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i;
  675         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  676         RF_StripeCount_t stripeOffset;
  677         int numFailures;
  678         RF_RaidAddr_t sosAddr;
  679         RF_SectorNum_t diskOffset, poffset;
  680         RF_RowCol_t testrow;
  681 
  682         /* Quick out in the fault-free case. */
  683         RF_LOCK_MUTEX(raidPtr->mutex);
  684         numFailures = raidPtr->numFailures;
  685         RF_UNLOCK_MUTEX(raidPtr->mutex);
  686         if (numFailures == 0)
  687                 return (0);
  688 
  689         sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
  690             asmap->raidAddress);
  691         row = asmap->physInfo->row;
  692         (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress,
  693             &diskids, &testrow);
  694         (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress,
  695             &prow, &pcol, &poffset, 0); /* get pcol */
  696 
  697         /*
  698          * This needs not be true if we've redirected the access to a spare in
  699          * another row.
  700          * RF_ASSERT(row == testrow);
  701          */
  702         stripeOffset = 0;
  703         for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) {
  704                 if (diskids[i] != pcol) {
  705                         if (RF_DEAD_DISK(raidPtr
  706                             ->Disks[testrow][diskids[i]].status)) {
  707                                 if (raidPtr->status[testrow] !=
  708                                     rf_rs_reconstructing)
  709                                         return (1);
  710                                 RF_ASSERT(
  711                                     raidPtr->reconControl[testrow]->fcol ==
  712                                     diskids[i]);
  713                                 layoutPtr->map->MapSector(raidPtr,
  714                                     sosAddr + stripeOffset *
  715                                     layoutPtr->sectorsPerStripeUnit,
  716                                     &trow, &tcol, &diskOffset, 0);
  717                                 RF_ASSERT((trow == testrow) &&
  718                                     (tcol == diskids[i]));
  719                                 if (!rf_CheckRUReconstructed(raidPtr
  720                                      ->reconControl[testrow]->reconMap,
  721                                      diskOffset))
  722                                         return (1);
  723                                 asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
  724                                 return (0);
  725                         }
  726                         stripeOffset++;
  727                 }
  728         }
  729         return (0);
  730 }
  731 
  732 /*
  733  * Return the number of failed data units in the stripe.
  734  */
  735 int
  736 rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
  737 {
  738         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  739         RF_RowCol_t trow, tcol, row, i;
  740         RF_SectorNum_t diskOffset;
  741         RF_RaidAddr_t sosAddr;
  742         int numFailures;
  743 
  744         /* Quick out in the fault-free case. */
  745         RF_LOCK_MUTEX(raidPtr->mutex);
  746         numFailures = raidPtr->numFailures;
  747         RF_UNLOCK_MUTEX(raidPtr->mutex);
  748         if (numFailures == 0)
  749                 return (0);
  750         numFailures = 0;
  751 
  752         sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
  753             asmap->raidAddress);
  754         row = asmap->physInfo->row;
  755         for (i = 0; i < layoutPtr->numDataCol; i++) {
  756                 (layoutPtr->map->MapSector) (raidPtr, sosAddr + i *
  757                     layoutPtr->sectorsPerStripeUnit,
  758                     &trow, &tcol, &diskOffset, 0);
  759                 if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status))
  760                         numFailures++;
  761         }
  762 
  763         return numFailures;
  764 }
  765 
  766 
  767 /*****************************************************************************
  768  *
  769  * Debug routines.
  770  *
  771  *****************************************************************************/
  772 
  773 void
  774 rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h)
  775 {
  776         rf_PrintFullAccessStripeMap(asm_h, 0);
  777 }
  778 
  779 void
  780 rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h,
  781     int prbuf   /* Flag to print buffer pointers. */)
  782 {
  783         int i;
  784         RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
  785         RF_PhysDiskAddr_t *p;
  786         printf("%d stripes total\n", (int) asm_h->numStripes);
  787         for (; asmap; asmap = asmap->next) {
  788                 /* printf("Num failures: %d\n", asmap->numDataFailed); */
  789                 /* printf("Num sectors: %d\n",
  790                  * (int)asmap->totalSectorsAccessed); */
  791                 printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
  792                     (int) asmap->stripeID,
  793                     (int) asmap->totalSectorsAccessed,
  794                     (int) asmap->numDataFailed,
  795                     (int) asmap->numParityFailed);
  796                 if (asmap->parityInfo) {
  797                         printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row,
  798                             asmap->parityInfo->col,
  799                             (int) asmap->parityInfo->startSector,
  800                             (int) (asmap->parityInfo->startSector +
  801                             asmap->parityInfo->numSector - 1));
  802                         if (prbuf)
  803                                 printf(" b0x%lx",
  804                                     (unsigned long) asmap->parityInfo->bufPtr);
  805                         if (asmap->parityInfo->next) {
  806                                 printf(", r%d c%d s%d-%d",
  807                                     asmap->parityInfo->next->row,
  808                                     asmap->parityInfo->next->col,
  809                                     (int) asmap->parityInfo->next->startSector,
  810                                     (int) (asmap->parityInfo->next->startSector
  811                                     + asmap->parityInfo->next->numSector - 1));
  812                                 if (prbuf)
  813                                         printf(" b0x%lx", (unsigned long)
  814                                             asmap->parityInfo->next->bufPtr);
  815                                 RF_ASSERT(asmap->parityInfo->next->next
  816                                     == NULL);
  817                         }
  818                         printf("]\n\t");
  819                 }
  820                 for (i = 0, p = asmap->physInfo; p; p = p->next, i++) {
  821                         printf("SU r%d c%d s%d-%d ", p->row, p->col,
  822                             (int) p->startSector,
  823                             (int) (p->startSector + p->numSector - 1));
  824                         if (prbuf)
  825                                 printf("b0x%lx ", (unsigned long) p->bufPtr);
  826                         if (i && !(i & 1))
  827                                 printf("\n\t");
  828                 }
  829                 printf("\n");
  830                 p = asm_h->stripeMap->failedPDAs[0];
  831                 if (asm_h->stripeMap->numDataFailed +
  832                     asm_h->stripeMap->numParityFailed > 1)
  833                         printf("[multiple failures]\n");
  834                 else
  835                         if (asm_h->stripeMap->numDataFailed +
  836                             asm_h->stripeMap->numParityFailed > 0)
  837                                 printf("\t[Failed PDA: r%d c%d s%d-%d]\n",
  838                                     p->row, p->col, (int) p->startSector,
  839                                     (int) (p->startSector + p->numSector - 1));
  840         }
  841 }
  842 
  843 void
  844 rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
  845     RF_SectorCount_t numBlocks)
  846 {
  847         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  848         RF_RaidAddr_t ra, sosAddr =
  849             rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
  850 
  851         printf("Raid addrs of SU boundaries from start of stripe to end"
  852             " of access:\n\t");
  853         for (ra = sosAddr; ra <= raidAddr + numBlocks;
  854              ra += layoutPtr->sectorsPerStripeUnit) {
  855                 printf("%d (0x%x), ", (int) ra, (int) ra);
  856         }
  857         printf("\n");
  858         printf("Offset into stripe unit: %d (0x%x)\n",
  859             (int) (raidAddr % layoutPtr->sectorsPerStripeUnit),
  860             (int) (raidAddr % layoutPtr->sectorsPerStripeUnit));
  861 }
  862 
  863 /*
  864  * Given a parity descriptor and the starting address within a stripe,
  865  * range restrict the parity descriptor to touch only the correct stuff.
  866  */
  867 void
  868 rf_ASMParityAdjust(
  869     RF_PhysDiskAddr_t   *toAdjust,
  870     RF_StripeNum_t       startAddrWithinStripe,
  871     RF_SectorNum_t       endAddress,
  872     RF_RaidLayout_t     *layoutPtr,
  873     RF_AccessStripeMap_t *asm_p
  874 )
  875 {
  876         RF_PhysDiskAddr_t *new_pda;
  877 
  878         /*
  879          * When we're accessing only a portion of one stripe unit, we want the
  880          * parity descriptor to identify only the chunk of parity associated
  881          * with the data. When the access spans exactly one stripe unit
  882          * boundary and is less than a stripe unit in size, it uses two
  883          * disjoint regions of the parity unit. When an access spans more
  884          * than one stripe unit boundary, it uses all of the parity unit.
  885          *
  886          * To better handle the case where stripe units are small, we may
  887          * eventually want to change the 2nd case so that if the SU size is
  888          * below some threshold, we just read/write the whole thing instead of
  889          * breaking it up into two accesses.
  890          */
  891         if (asm_p->numStripeUnitsAccessed == 1) {
  892                 int x = (startAddrWithinStripe %
  893                     layoutPtr->sectorsPerStripeUnit);
  894                 toAdjust->startSector += x;
  895                 toAdjust->raidAddress += x;
  896                 toAdjust->numSector = asm_p->physInfo->numSector;
  897                 RF_ASSERT(toAdjust->numSector != 0);
  898         } else
  899                 if (asm_p->numStripeUnitsAccessed == 2 &&
  900                     asm_p->totalSectorsAccessed <
  901                     layoutPtr->sectorsPerStripeUnit) {
  902                         int x = (startAddrWithinStripe %
  903                             layoutPtr->sectorsPerStripeUnit);
  904 
  905                         /*
  906                          * Create a second pda and copy the parity map info
  907                          * into it.
  908                          */
  909                         RF_ASSERT(toAdjust->next == NULL);
  910                         new_pda = toAdjust->next = rf_AllocPhysDiskAddr();
  911                         *new_pda = *toAdjust;   /* Structure assignment. */
  912                         new_pda->next = NULL;
  913 
  914                         /*
  915                          * Adjust the start sector & number of blocks for the
  916                          * first parity pda.
  917                          */
  918                         toAdjust->startSector += x;
  919                         toAdjust->raidAddress += x;
  920                         toAdjust->numSector =
  921                             rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
  922                              startAddrWithinStripe) - startAddrWithinStripe;
  923                         RF_ASSERT(toAdjust->numSector != 0);
  924 
  925                         /* Adjust the second pda. */
  926                         new_pda->numSector = endAddress -
  927                             rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
  928                              endAddress);
  929                         /* new_pda->raidAddress =
  930                          *     rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
  931                          *      toAdjust->raidAddress); */
  932                         RF_ASSERT(new_pda->numSector != 0);
  933                 }
  934 }
  935 
  936 /*
  937  * Check if a disk has been spared or failed. If spared, redirect the I/O.
  938  * If it has been failed, record it in the asm pointer.
  939  * Fourth arg is whether data or parity.
  940  */
  941 void
  942 rf_ASMCheckStatus(
  943     RF_Raid_t            *raidPtr,
  944     RF_PhysDiskAddr_t    *pda_p,
  945     RF_AccessStripeMap_t *asm_p,
  946     RF_RaidDisk_t       **disks,
  947     int                   parity
  948 )
  949 {
  950         RF_DiskStatus_t dstatus;
  951         RF_RowCol_t frow, fcol;
  952 
  953         dstatus = disks[pda_p->row][pda_p->col].status;
  954 
  955         if (dstatus == rf_ds_spared) {
  956                 /* If the disk has been spared, redirect access to the spare. */
  957                 frow = pda_p->row;
  958                 fcol = pda_p->col;
  959                 pda_p->row = disks[frow][fcol].spareRow;
  960                 pda_p->col = disks[frow][fcol].spareCol;
  961         } else
  962                 if (dstatus == rf_ds_dist_spared) {
  963                         /* Ditto if disk has been spared to dist spare space. */
  964                         RF_RowCol_t or = pda_p->row, oc = pda_p->col;
  965                         RF_SectorNum_t oo = pda_p->startSector;
  966 
  967                         if (pda_p->type == RF_PDA_TYPE_DATA)
  968                                 raidPtr->Layout.map->MapSector(raidPtr,
  969                                     pda_p->raidAddress, &pda_p->row,
  970                                     &pda_p->col, &pda_p->startSector, RF_REMAP);
  971                         else
  972                                 raidPtr->Layout.map->MapParity(raidPtr,
  973                                     pda_p->raidAddress, &pda_p->row,
  974                                     &pda_p->col, &pda_p->startSector, RF_REMAP);
  975 
  976                         if (rf_mapDebug) {
  977                                 printf("Redirected r %d c %d o %d -> r%d c %d"
  978                                     " o %d\n", or, oc, (int) oo, pda_p->row,
  979                                     pda_p->col, (int) pda_p->startSector);
  980                         }
  981                 } else
  982                         if (RF_DEAD_DISK(dstatus)) {
  983                                 /*
  984                                  * If the disk is inaccessible, mark the
  985                                  * failure.
  986                                  */
  987                                 if (parity)
  988                                         asm_p->numParityFailed++;
  989                                 else {
  990                                         asm_p->numDataFailed++;
  991 #if 0
  992                                         /*
  993                                          * XXX Do we really want this spewing
  994                                          * out on the console ? GO
  995                                          */
  996                                         printf("DATA_FAILED !\n");
  997 #endif
  998                                 }
  999                                 asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
 1000                                 asm_p->numFailedPDAs++;
 1001 #if 0
 1002                                 switch (asm_p->numParityFailed +
 1003                                     asm_p->numDataFailed) {
 1004                                 case 1:
 1005                                         asm_p->failedPDAs[0] = pda_p;
 1006                                         break;
 1007                                 case 2:
 1008                                         asm_p->failedPDAs[1] = pda_p;
 1009                                 default:
 1010                                         break;
 1011                                 }
 1012 #endif
 1013                         }
 1014         /* The redirected access should never span a stripe unit boundary. */
 1015         RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout,
 1016              pda_p->raidAddress) ==
 1017             rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress +
 1018              pda_p->numSector - 1));
 1019         RF_ASSERT(pda_p->col != -1);
 1020 }

/* [<][>][^][v][top][bottom][index][help] */