root/dev/raidframe/rf_dagdegrd.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_CreateRaidFiveDegradedReadDAG
  2. rf_CreateRaidOneDegradedReadDAG
  3. rf_CreateDegradedReadDAG
  4. rf_CreateRaidCDegradedReadDAG
  5. rf_DD_GenerateFailedAccessASMs
  6. rf_DoubleDegRead

    1 /*      $OpenBSD: rf_dagdegrd.c,v 1.6 2006/07/09 22:10:05 mk Exp $      */
    2 /*      $NetBSD: rf_dagdegrd.c,v 1.5 2000/01/07 03:40:57 oster Exp $    */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*
   32  * rf_dagdegrd.c
   33  *
   34  * Code for creating degraded read DAGs.
   35  */
   36 
   37 #include "rf_types.h"
   38 #include "rf_raid.h"
   39 #include "rf_dag.h"
   40 #include "rf_dagutils.h"
   41 #include "rf_dagfuncs.h"
   42 #include "rf_debugMem.h"
   43 #include "rf_memchunk.h"
   44 #include "rf_general.h"
   45 #include "rf_dagdegrd.h"
   46 
   47 
   48 /*****************************************************************************
   49  *
   50  * General comments on DAG creation:
   51  *
   52  * All DAGs in this file use roll-away error recovery. Each DAG has a single
   53  * commit node, usually called "Cmt". If an error occurs before the Cmt node
   54  * is reached, the execution engine will halt forward execution and work
   55  * backward through the graph, executing the undo functions. Assuming that
   56  * each node in the graph prior to the Cmt node are undoable and atomic - or -
   57  * does not make changes to permanent state, the graph will fail atomically.
   58  * If an error occurs after the Cmt node executes, the engine will roll-forward
   59  * through the graph, blindly executing nodes until it reaches the end.
   60  * If a graph reaches the end, it is assumed to have completed successfully.
   61  *
   62  * A graph has only 1 Cmt node.
   63  *
   64  *****************************************************************************/
   65 
   66 
   67 /*****************************************************************************
   68  *
   69  * The following wrappers map the standard DAG creation interface to the
   70  * DAG creation routines. Additionally, these wrappers enable experimentation
   71  * with new DAG structures by providing an extra level of indirection, allowing
   72  * the DAG creation routines to be replaced at this single point.
   73  *
   74  *****************************************************************************/
   75 
   76 void
   77 rf_CreateRaidFiveDegradedReadDAG(
   78     RF_Raid_t                   *raidPtr,
   79     RF_AccessStripeMap_t        *asmap,
   80     RF_DagHeader_t              *dag_h,
   81     void                        *bp,
   82     RF_RaidAccessFlags_t         flags,
   83     RF_AllocListElem_t          *allocList)
   84 {
   85         rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
   86             &rf_xorRecoveryFuncs);
   87 }
   88 
   89 
   90 /*****************************************************************************
   91  *
   92  * DAG creation code begins here.
   93  *
   94  *****************************************************************************/
   95 
   96 
   97 /*****************************************************************************
   98  * Create a degraded read DAG for RAID level 1.
   99  *
  100  * Hdr -> Nil -> R(p/s)d -> Commit -> Trm
  101  *
  102  * The "Rd" node reads data from the surviving disk in the mirror pair.
  103  *   Rpd - read of primary copy
  104  *   Rsd - read of secondary copy
  105  *
  106  * Parameters:  raidPtr   - description of the physical array
  107  *              asmap     - logical & physical addresses for this access
  108  *              bp        - buffer ptr (for holding write data)
  109  *              flags     - general flags (e.g. disk locking)
  110  *              allocList - list of memory allocated in DAG creation
  111  *****************************************************************************/
  112 
  113 void
  114 rf_CreateRaidOneDegradedReadDAG(
  115     RF_Raid_t                   *raidPtr,
  116     RF_AccessStripeMap_t        *asmap,
  117     RF_DagHeader_t              *dag_h,
  118     void                        *bp,
  119     RF_RaidAccessFlags_t         flags,
  120     RF_AllocListElem_t          *allocList)
  121 {
  122         RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
  123         RF_StripeNum_t parityStripeID;
  124         RF_ReconUnitNum_t which_ru;
  125         RF_PhysDiskAddr_t *pda;
  126         int useMirror, i;
  127 
  128         useMirror = 0;
  129         parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
  130             asmap->raidAddress, &which_ru);
  131         if (rf_dagDebug) {
  132                 printf("[Creating RAID level 1 degraded read DAG]\n");
  133         }
  134         dag_h->creator = "RaidOneDegradedReadDAG";
  135         /* Alloc the Wnd nodes and the Wmir node. */
  136         if (asmap->numDataFailed == 0)
  137                 useMirror = RF_FALSE;
  138         else
  139                 useMirror = RF_TRUE;
  140 
  141         /* Total number of nodes = 1 + (block + commit + terminator). */
  142         RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *),
  143             allocList);
  144         i = 0;
  145         rdNode = &nodes[i];
  146         i++;
  147         blockNode = &nodes[i];
  148         i++;
  149         commitNode = &nodes[i];
  150         i++;
  151         termNode = &nodes[i];
  152         i++;
  153 
  154         /*
  155          * This dag can not commit until the commit node is reached. Errors
  156          * prior to the commit point imply the dag has failed and must be
  157          * retried.
  158          */
  159         dag_h->numCommitNodes = 1;
  160         dag_h->numCommits = 0;
  161         dag_h->numSuccedents = 1;
  162 
  163         /* Initialize the block, commit, and terminator nodes. */
  164         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  165             rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
  166         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  167             rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
  168         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  169             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  170 
  171         pda = asmap->physInfo;
  172         RF_ASSERT(pda != NULL);
  173         /* parityInfo must describe entire parity unit. */
  174         RF_ASSERT(asmap->parityInfo->next == NULL);
  175 
  176         /* Initialize the data node. */
  177         if (!useMirror) {
  178                 /* Read primary copy of data. */
  179                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  180                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  181                     dag_h, "Rpd", allocList);
  182                 rdNode->params[0].p = pda;
  183                 rdNode->params[1].p = pda->bufPtr;
  184                 rdNode->params[2].v = parityStripeID;
  185                 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  186                     0, 0, which_ru);
  187         } else {
  188                 /* Read secondary copy of data. */
  189                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  190                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  191                     dag_h, "Rsd", allocList);
  192                 rdNode->params[0].p = asmap->parityInfo;
  193                 rdNode->params[1].p = pda->bufPtr;
  194                 rdNode->params[2].v = parityStripeID;
  195                 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  196                     0, 0, which_ru);
  197         }
  198 
  199         /* Connect header to block node. */
  200         RF_ASSERT(dag_h->numSuccedents == 1);
  201         RF_ASSERT(blockNode->numAntecedents == 0);
  202         dag_h->succedents[0] = blockNode;
  203 
  204         /* Connect block node to rdnode. */
  205         RF_ASSERT(blockNode->numSuccedents == 1);
  206         RF_ASSERT(rdNode->numAntecedents == 1);
  207         blockNode->succedents[0] = rdNode;
  208         rdNode->antecedents[0] = blockNode;
  209         rdNode->antType[0] = rf_control;
  210 
  211         /* Connect rdnode to commit node. */
  212         RF_ASSERT(rdNode->numSuccedents == 1);
  213         RF_ASSERT(commitNode->numAntecedents == 1);
  214         rdNode->succedents[0] = commitNode;
  215         commitNode->antecedents[0] = rdNode;
  216         commitNode->antType[0] = rf_control;
  217 
  218         /* Connect commit node to terminator. */
  219         RF_ASSERT(commitNode->numSuccedents == 1);
  220         RF_ASSERT(termNode->numAntecedents == 1);
  221         RF_ASSERT(termNode->numSuccedents == 0);
  222         commitNode->succedents[0] = termNode;
  223         termNode->antecedents[0] = commitNode;
  224         termNode->antType[0] = rf_control;
  225 }
  226 
  227 
  228 /*****************************************************************************
  229  *
  230  * Create a DAG to perform a degraded-mode read of data within one stripe.
  231  * This DAG is as follows:
  232  *
  233  * Hdr -> Block -> Rud -> Xor -> Cmt -> T
  234  *              -> Rrd ->
  235  *              -> Rp -->
  236  *
  237  * Each R node is a successor of the L node.
  238  * One successor arc from each R node goes to C, and the other to X.
  239  * There is one Rud for each chunk of surviving user data requested by the
  240  * user, and one Rrd for each chunk of surviving user data _not_ being read by
  241  * the user.
  242  * R = read, ud = user data, rd = recovery (surviving) data, p = parity
  243  * X = XOR, C = Commit, T = terminate
  244  *
  245  * The block node guarantees a single source node.
  246  *
  247  * Note:  The target buffer for the XOR node is set to the actual user buffer
  248  * where the failed data is supposed to end up. This buffer is zero'd by the
  249  * code here. Thus, if you create a degraded read dag, use it, and then
  250  * re-use, you have to be sure to zero the target buffer prior to the re-use.
  251  *
  252  * The recfunc argument at the end specifies the name and function used for
  253  * the redundancy recovery function.
  254  *
  255  *****************************************************************************/
  256 
  257 void
  258 rf_CreateDegradedReadDAG(
  259     RF_Raid_t                   *raidPtr,
  260     RF_AccessStripeMap_t        *asmap,
  261     RF_DagHeader_t              *dag_h,
  262     void                        *bp,
  263     RF_RaidAccessFlags_t         flags,
  264     RF_AllocListElem_t          *allocList,
  265     RF_RedFuncs_t               *recFunc)
  266 {
  267         RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode;
  268         RF_DagNode_t *commitNode, *rpNode, *termNode;
  269         int nNodes, nRrdNodes, nRudNodes, nXorBufs, i;
  270         int j, paramNum;
  271         RF_SectorCount_t sectorsPerSU;
  272         RF_ReconUnitNum_t which_ru;
  273         char *overlappingPDAs;          /* A temporary array of flags. */
  274         RF_AccessStripeMapHeader_t *new_asm_h[2];
  275         RF_PhysDiskAddr_t *pda, *parityPDA;
  276         RF_StripeNum_t parityStripeID;
  277         RF_PhysDiskAddr_t *failedPDA;
  278         RF_RaidLayout_t *layoutPtr;
  279         char *rpBuf;
  280 
  281         layoutPtr = &(raidPtr->Layout);
  282         /*
  283          * failedPDA points to the pda within the asm that targets
  284          * the failed disk.
  285          */
  286         failedPDA = asmap->failedPDAs[0];
  287         parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
  288             asmap->raidAddress, &which_ru);
  289         sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
  290 
  291         if (rf_dagDebug) {
  292                 printf("[Creating degraded read DAG]\n");
  293         }
  294         RF_ASSERT(asmap->numDataFailed == 1);
  295         dag_h->creator = "DegradedReadDAG";
  296 
  297         /*
  298          * Generate two ASMs identifying the surviving data we need
  299          * in order to recover the lost data.
  300          */
  301 
  302         /* overlappingPDAs array must be zero'd. */
  303         RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed,
  304             sizeof(char), (char *));
  305         rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h,
  306             new_asm_h, &nXorBufs, &rpBuf, overlappingPDAs, allocList);
  307 
  308         /*
  309          * Create all the nodes at once.
  310          *
  311          * -1 because no access is generated for the failed pda.
  312          */
  313         nRudNodes = asmap->numStripeUnitsAccessed - 1;
  314         nRrdNodes = ((new_asm_h[0]) ?
  315             new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
  316             ((new_asm_h[1]) ?
  317             new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
  318         nNodes = 5 + nRudNodes + nRrdNodes;     /*
  319                                                  * lock, unlock, xor, Rp,
  320                                                  * Rud, Rrd
  321                                                  */
  322         RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
  323             allocList);
  324         i = 0;
  325         blockNode = &nodes[i];
  326         i++;
  327         commitNode = &nodes[i];
  328         i++;
  329         xorNode = &nodes[i];
  330         i++;
  331         rpNode = &nodes[i];
  332         i++;
  333         termNode = &nodes[i];
  334         i++;
  335         rudNodes = &nodes[i];
  336         i += nRudNodes;
  337         rrdNodes = &nodes[i];
  338         i += nRrdNodes;
  339         RF_ASSERT(i == nNodes);
  340 
  341         /* Initialize nodes. */
  342         dag_h->numCommitNodes = 1;
  343         dag_h->numCommits = 0;
  344         /*
  345          * This dag can not commit until the commit node is reached.
  346          * Errors prior to the commit point imply the dag has failed.
  347          */
  348         dag_h->numSuccedents = 1;
  349 
  350         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  351             rf_NullNodeUndoFunc, NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0,
  352             dag_h, "Nil", allocList);
  353         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  354             rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
  355         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  356             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  357         rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple,
  358             rf_NullNodeUndoFunc, NULL, 1, nRudNodes + nRrdNodes + 1,
  359             2 * nXorBufs + 2, 1, dag_h, recFunc->SimpleName, allocList);
  360 
  361         /* Fill in the Rud nodes. */
  362         for (pda = asmap->physInfo, i = 0; i < nRudNodes;
  363              i++, pda = pda->next) {
  364                 if (pda == failedPDA) {
  365                         i--;
  366                         continue;
  367                 }
  368                 rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc,
  369                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  370                     dag_h, "Rud", allocList);
  371                 RF_ASSERT(pda);
  372                 rudNodes[i].params[0].p = pda;
  373                 rudNodes[i].params[1].p = pda->bufPtr;
  374                 rudNodes[i].params[2].v = parityStripeID;
  375                 rudNodes[i].params[3].v =
  376                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  377         }
  378 
  379         /* Fill in the Rrd nodes. */
  380         i = 0;
  381         if (new_asm_h[0]) {
  382                 for (pda = new_asm_h[0]->stripeMap->physInfo;
  383                      i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
  384                      i++, pda = pda->next) {
  385                         rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE,
  386                             rf_DiskReadFunc, rf_DiskReadUndoFunc,
  387                             rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  388                             "Rrd", allocList);
  389                         RF_ASSERT(pda);
  390                         rrdNodes[i].params[0].p = pda;
  391                         rrdNodes[i].params[1].p = pda->bufPtr;
  392                         rrdNodes[i].params[2].v = parityStripeID;
  393                         rrdNodes[i].params[3].v =
  394                             RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
  395                             which_ru);
  396                 }
  397         }
  398         if (new_asm_h[1]) {
  399                 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
  400                     j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
  401                     j++, pda = pda->next) {
  402                         rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE,
  403                             rf_DiskReadFunc, rf_DiskReadUndoFunc,
  404                             rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  405                             "Rrd", allocList);
  406                         RF_ASSERT(pda);
  407                         rrdNodes[i + j].params[0].p = pda;
  408                         rrdNodes[i + j].params[1].p = pda->bufPtr;
  409                         rrdNodes[i + j].params[2].v = parityStripeID;
  410                         rrdNodes[i + j].params[3].v =
  411                             RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
  412                             which_ru);
  413                 }
  414         }
  415         /* Make a PDA for the parity unit. */
  416         RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
  417             (RF_PhysDiskAddr_t *), allocList);
  418         parityPDA->row = asmap->parityInfo->row;
  419         parityPDA->col = asmap->parityInfo->col;
  420         parityPDA->startSector = ((asmap->parityInfo->startSector /
  421             sectorsPerSU) * sectorsPerSU) +
  422             (failedPDA->startSector % sectorsPerSU);
  423         parityPDA->numSector = failedPDA->numSector;
  424 
  425         /* Initialize the Rp node. */
  426         rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  427             rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  428             "Rp ", allocList);
  429         rpNode->params[0].p = parityPDA;
  430         rpNode->params[1].p = rpBuf;
  431         rpNode->params[2].v = parityStripeID;
  432         rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
  433             which_ru);
  434 
  435         /*
  436          * The last and nastiest step is to assign all
  437          * the parameters of the Xor node.
  438          */
  439         paramNum = 0;
  440         for (i = 0; i < nRrdNodes; i++) {
  441                 /* All the Rrd nodes need to be xored together. */
  442                 xorNode->params[paramNum++] = rrdNodes[i].params[0];
  443                 xorNode->params[paramNum++] = rrdNodes[i].params[1];
  444         }
  445         for (i = 0; i < nRudNodes; i++) {
  446                 /* Any Rud nodes that overlap the failed access need to be
  447                  * xored in. */
  448                 if (overlappingPDAs[i]) {
  449                         RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t),
  450                             (RF_PhysDiskAddr_t *), allocList);
  451                         bcopy((char *) rudNodes[i].params[0].p, (char *) pda,
  452                             sizeof(RF_PhysDiskAddr_t));
  453                         rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
  454                             RF_RESTRICT_DOBUFFER, 0);
  455                         xorNode->params[paramNum++].p = pda;
  456                         xorNode->params[paramNum++].p = pda->bufPtr;
  457                 }
  458         }
  459         RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
  460 
  461         /* Install parity pda as last set of params to be xor'd. */
  462         xorNode->params[paramNum++].p = parityPDA;
  463         xorNode->params[paramNum++].p = rpBuf;
  464 
  465         /*
  466          * The last 2 params to the recovery xor node are
  467          * the failed PDA and the raidPtr.
  468          */
  469         xorNode->params[paramNum++].p = failedPDA;
  470         xorNode->params[paramNum++].p = raidPtr;
  471         RF_ASSERT(paramNum == 2 * nXorBufs + 2);
  472 
  473         /*
  474          * The xor node uses results[0] as the target buffer.
  475          * Set pointer and zero the buffer. In the kernel, this
  476          * may be a user buffer in which case we have to remap it.
  477          */
  478         xorNode->results[0] = failedPDA->bufPtr;
  479         RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr,
  480             failedPDA->numSector));
  481 
  482         /* Connect nodes to form graph. */
  483         /* Connect the header to the block node. */
  484         RF_ASSERT(dag_h->numSuccedents == 1);
  485         RF_ASSERT(blockNode->numAntecedents == 0);
  486         dag_h->succedents[0] = blockNode;
  487 
  488         /* Connect the block node to the read nodes. */
  489         RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes));
  490         RF_ASSERT(rpNode->numAntecedents == 1);
  491         blockNode->succedents[0] = rpNode;
  492         rpNode->antecedents[0] = blockNode;
  493         rpNode->antType[0] = rf_control;
  494         for (i = 0; i < nRrdNodes; i++) {
  495                 RF_ASSERT(rrdNodes[i].numSuccedents == 1);
  496                 blockNode->succedents[1 + i] = &rrdNodes[i];
  497                 rrdNodes[i].antecedents[0] = blockNode;
  498                 rrdNodes[i].antType[0] = rf_control;
  499         }
  500         for (i = 0; i < nRudNodes; i++) {
  501                 RF_ASSERT(rudNodes[i].numSuccedents == 1);
  502                 blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i];
  503                 rudNodes[i].antecedents[0] = blockNode;
  504                 rudNodes[i].antType[0] = rf_control;
  505         }
  506 
  507         /* Connect the read nodes to the xor node. */
  508         RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes));
  509         RF_ASSERT(rpNode->numSuccedents == 1);
  510         rpNode->succedents[0] = xorNode;
  511         xorNode->antecedents[0] = rpNode;
  512         xorNode->antType[0] = rf_trueData;
  513         for (i = 0; i < nRrdNodes; i++) {
  514                 RF_ASSERT(rrdNodes[i].numSuccedents == 1);
  515                 rrdNodes[i].succedents[0] = xorNode;
  516                 xorNode->antecedents[1 + i] = &rrdNodes[i];
  517                 xorNode->antType[1 + i] = rf_trueData;
  518         }
  519         for (i = 0; i < nRudNodes; i++) {
  520                 RF_ASSERT(rudNodes[i].numSuccedents == 1);
  521                 rudNodes[i].succedents[0] = xorNode;
  522                 xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i];
  523                 xorNode->antType[1 + nRrdNodes + i] = rf_trueData;
  524         }
  525 
  526         /* Connect the xor node to the commit node. */
  527         RF_ASSERT(xorNode->numSuccedents == 1);
  528         RF_ASSERT(commitNode->numAntecedents == 1);
  529         xorNode->succedents[0] = commitNode;
  530         commitNode->antecedents[0] = xorNode;
  531         commitNode->antType[0] = rf_control;
  532 
  533         /* Connect the termNode to the commit node. */
  534         RF_ASSERT(commitNode->numSuccedents == 1);
  535         RF_ASSERT(termNode->numAntecedents == 1);
  536         RF_ASSERT(termNode->numSuccedents == 0);
  537         commitNode->succedents[0] = termNode;
  538         termNode->antType[0] = rf_control;
  539         termNode->antecedents[0] = commitNode;
  540 }
  541 
  542 
  543 /*****************************************************************************
  544  * Create a degraded read DAG for Chained Declustering.
  545  *
  546  * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm
  547  *
  548  * The "Rd" node reads data from the surviving disk in the mirror pair
  549  *   Rpd - read of primary copy
  550  *   Rsd - read of secondary copy
  551  *
  552  * Parameters:  raidPtr   - description of the physical array
  553  *              asmap     - logical & physical addresses for this access
  554  *              bp        - buffer ptr (for holding write data)
  555  *              flags     - general flags (e.g. disk locking)
  556  *              allocList - list of memory allocated in DAG creation
  557  *****************************************************************************/
  558 
  559 void
  560 rf_CreateRaidCDegradedReadDAG(
  561     RF_Raid_t                   *raidPtr,
  562     RF_AccessStripeMap_t        *asmap,
  563     RF_DagHeader_t              *dag_h,
  564     void                        *bp,
  565     RF_RaidAccessFlags_t         flags,
  566     RF_AllocListElem_t          *allocList
  567 )
  568 {
  569         RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
  570         RF_StripeNum_t parityStripeID;
  571         int useMirror, i, shiftable;
  572         RF_ReconUnitNum_t which_ru;
  573         RF_PhysDiskAddr_t *pda;
  574 
  575         if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
  576                 shiftable = RF_TRUE;
  577         } else {
  578                 shiftable = RF_FALSE;
  579         }
  580         useMirror = 0;
  581         parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
  582             asmap->raidAddress, &which_ru);
  583 
  584         if (rf_dagDebug) {
  585                 printf("[Creating RAID C degraded read DAG]\n");
  586         }
  587         dag_h->creator = "RaidCDegradedReadDAG";
  588         /* Alloc the Wnd nodes and the Wmir node. */
  589         if (asmap->numDataFailed == 0)
  590                 useMirror = RF_FALSE;
  591         else
  592                 useMirror = RF_TRUE;
  593 
  594         /* total number of nodes = 1 + (block + commit + terminator) */
  595         RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *),
  596             allocList);
  597         i = 0;
  598         rdNode = &nodes[i];
  599         i++;
  600         blockNode = &nodes[i];
  601         i++;
  602         commitNode = &nodes[i];
  603         i++;
  604         termNode = &nodes[i];
  605         i++;
  606 
  607         /*
  608          * This dag can not commit until the commit node is reached.
  609          * Errors prior to the commit point imply the dag has failed
  610          * and must be retried.
  611          */
  612         dag_h->numCommitNodes = 1;
  613         dag_h->numCommits = 0;
  614         dag_h->numSuccedents = 1;
  615 
  616         /* initialize the block, commit, and terminator nodes */
  617         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  618             rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
  619         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  620             rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
  621         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  622             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  623 
  624         pda = asmap->physInfo;
  625         RF_ASSERT(pda != NULL);
  626         /* ParityInfo must describe entire parity unit. */
  627         RF_ASSERT(asmap->parityInfo->next == NULL);
  628 
  629         /* Initialize the data node. */
  630         if (!useMirror) {
  631                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  632                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  633                     dag_h, "Rpd", allocList);
  634                 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) {
  635                         /* Shift this read to the next disk in line. */
  636                         rdNode->params[0].p = asmap->parityInfo;
  637                         rdNode->params[1].p = pda->bufPtr;
  638                         rdNode->params[2].v = parityStripeID;
  639                         rdNode->params[3].v = RF_CREATE_PARAM3(
  640                             RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  641                 } else {
  642                         /* Read primary copy. */
  643                         rdNode->params[0].p = pda;
  644                         rdNode->params[1].p = pda->bufPtr;
  645                         rdNode->params[2].v = parityStripeID;
  646                         rdNode->params[3].v = RF_CREATE_PARAM3(
  647                             RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  648                 }
  649         } else {
  650                 /* Read secondary copy of data. */
  651                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  652                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  653                     dag_h, "Rsd", allocList);
  654                 rdNode->params[0].p = asmap->parityInfo;
  655                 rdNode->params[1].p = pda->bufPtr;
  656                 rdNode->params[2].v = parityStripeID;
  657                 rdNode->params[3].v =
  658                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  659         }
  660 
  661         /* Connect header to block node. */
  662         RF_ASSERT(dag_h->numSuccedents == 1);
  663         RF_ASSERT(blockNode->numAntecedents == 0);
  664         dag_h->succedents[0] = blockNode;
  665 
  666         /* Connect block node to rdnode. */
  667         RF_ASSERT(blockNode->numSuccedents == 1);
  668         RF_ASSERT(rdNode->numAntecedents == 1);
  669         blockNode->succedents[0] = rdNode;
  670         rdNode->antecedents[0] = blockNode;
  671         rdNode->antType[0] = rf_control;
  672 
  673         /* Connect rdnode to commit node. */
  674         RF_ASSERT(rdNode->numSuccedents == 1);
  675         RF_ASSERT(commitNode->numAntecedents == 1);
  676         rdNode->succedents[0] = commitNode;
  677         commitNode->antecedents[0] = rdNode;
  678         commitNode->antType[0] = rf_control;
  679 
  680         /* Connect commit node to terminator. */
  681         RF_ASSERT(commitNode->numSuccedents == 1);
  682         RF_ASSERT(termNode->numAntecedents == 1);
  683         RF_ASSERT(termNode->numSuccedents == 0);
  684         commitNode->succedents[0] = termNode;
  685         termNode->antecedents[0] = commitNode;
  686         termNode->antType[0] = rf_control;
  687 }
  688 
  689 /*
  690  * XXX move this elsewhere ?
  691  */
  692 void
  693 rf_DD_GenerateFailedAccessASMs(
  694     RF_Raid_t                    *raidPtr,
  695     RF_AccessStripeMap_t         *asmap,
  696     RF_PhysDiskAddr_t           **pdap,
  697     int                          *nNodep,
  698     RF_PhysDiskAddr_t           **pqpdap,
  699     int                          *nPQNodep,
  700     RF_AllocListElem_t           *allocList
  701 )
  702 {
  703         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  704         int PDAPerDisk, i;
  705         RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
  706         int numDataCol = layoutPtr->numDataCol;
  707         int state;
  708         RF_SectorNum_t suoff, suend;
  709         unsigned firstDataCol, napdas, count;
  710         RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0;
  711         RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0];
  712         RF_PhysDiskAddr_t *ftwo = asmap->failedPDAs[1];
  713         RF_PhysDiskAddr_t *pda_p;
  714         RF_PhysDiskAddr_t *phys_p;
  715         RF_RaidAddr_t sosAddr;
  716 
  717         /*
  718          * Determine how many pda's we will have to generate per unaccessed
  719          * stripe. If there is only one failed data unit, it is one; if two,
  720          * possibly two, depending whether they overlap.
  721          */
  722 
  723         fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector);
  724         fone_end = fone_start + fone->numSector;
  725 
  726 #define CONS_PDA(if,start,num)          do {                            \
  727         pda_p->row = asmap->if->row;                                    \
  728         pda_p->col = asmap->if->col;                                    \
  729         pda_p->startSector = ((asmap->if->startSector / secPerSU) *     \
  730             secPerSU) + start;                                          \
  731         pda_p->numSector = num;                                         \
  732         pda_p->next = NULL;                                             \
  733         RF_MallocAndAdd(pda_p->bufPtr,                                  \
  734             rf_RaidAddressToByte(raidPtr,num),(char *), allocList);     \
  735 } while (0)
  736 
  737         if (asmap->numDataFailed == 1) {
  738                 PDAPerDisk = 1;
  739                 state = 1;
  740                 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t),
  741                     (RF_PhysDiskAddr_t *), allocList);
  742                 pda_p = *pqpdap;
  743                 /* Build p. */
  744                 CONS_PDA(parityInfo, fone_start, fone->numSector);
  745                 pda_p->type = RF_PDA_TYPE_PARITY;
  746                 pda_p++;
  747                 /* Build q. */
  748                 CONS_PDA(qInfo, fone_start, fone->numSector);
  749                 pda_p->type = RF_PDA_TYPE_Q;
  750         } else {
  751                 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector);
  752                 ftwo_end = ftwo_start + ftwo->numSector;
  753                 if (fone->numSector + ftwo->numSector > secPerSU) {
  754                         PDAPerDisk = 1;
  755                         state = 2;
  756                         RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t),
  757                             (RF_PhysDiskAddr_t *), allocList);
  758                         pda_p = *pqpdap;
  759                         CONS_PDA(parityInfo, 0, secPerSU);
  760                         pda_p->type = RF_PDA_TYPE_PARITY;
  761                         pda_p++;
  762                         CONS_PDA(qInfo, 0, secPerSU);
  763                         pda_p->type = RF_PDA_TYPE_Q;
  764                 } else {
  765                         PDAPerDisk = 2;
  766                         state = 3;
  767                         /* Four of them, fone, then ftwo. */
  768                         RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t),
  769                             (RF_PhysDiskAddr_t *), allocList);
  770                         pda_p = *pqpdap;
  771                         CONS_PDA(parityInfo, fone_start, fone->numSector);
  772                         pda_p->type = RF_PDA_TYPE_PARITY;
  773                         pda_p++;
  774                         CONS_PDA(qInfo, fone_start, fone->numSector);
  775                         pda_p->type = RF_PDA_TYPE_Q;
  776                         pda_p++;
  777                         CONS_PDA(parityInfo, ftwo_start, ftwo->numSector);
  778                         pda_p->type = RF_PDA_TYPE_PARITY;
  779                         pda_p++;
  780                         CONS_PDA(qInfo, ftwo_start, ftwo->numSector);
  781                         pda_p->type = RF_PDA_TYPE_Q;
  782                 }
  783         }
  784         /* Figure out number of nonaccessed pda. */
  785         napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed -
  786             (ftwo == NULL ? 1 : 0));
  787         *nPQNodep = PDAPerDisk;
  788 
  789         /*
  790          * Sweep over the over accessed pda's, figuring out the number of
  791          * additional pda's to generate. Of course, skip the failed ones.
  792          */
  793 
  794         count = 0;
  795         for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) {
  796                 if ((pda_p == fone) || (pda_p == ftwo))
  797                         continue;
  798                 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector);
  799                 suend = suoff + pda_p->numSector;
  800                 switch (state) {
  801                 case 1: /* One failed PDA to overlap. */
  802                         /*
  803                          * If a PDA doesn't contain the failed unit, it can
  804                          * only miss the start or end, not both.
  805                          */
  806                         if ((suoff > fone_start) || (suend < fone_end))
  807                                 count++;
  808                         break;
  809                 case 2: /* Whole stripe. */
  810                         if (suoff)                      /* Leak at begining. */
  811                                 count++;
  812                         if (suend < numDataCol)         /* Leak at end. */
  813                                 count++;
  814                         break;
  815                 case 3: /* Two disjoint units. */
  816                         if ((suoff > fone_start) || (suend < fone_end))
  817                                 count++;
  818                         if ((suoff > ftwo_start) || (suend < ftwo_end))
  819                                 count++;
  820                         break;
  821                 default:
  822                         RF_PANIC();
  823                 }
  824         }
  825 
  826         napdas += count;
  827         *nNodep = napdas;
  828         if (napdas == 0)
  829                 return;         /* short circuit */
  830 
  831         /* Allocate up our list of pda's. */
  832 
  833         RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t),
  834             (RF_PhysDiskAddr_t *), allocList);
  835         *pdap = pda_p;
  836 
  837         /* Link them together. */
  838         for (i = 0; i < (napdas - 1); i++)
  839                 pda_p[i].next = pda_p + (i + 1);
  840 
  841         /* March through the one's up to the first accessed disk. */
  842         firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  843             asmap->physInfo->raidAddress) % numDataCol;
  844         sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
  845             asmap->raidAddress);
  846         for (i = 0; i < firstDataCol; i++) {
  847                 if ((pda_p - (*pdap)) == napdas)
  848                         continue;
  849                 pda_p->type = RF_PDA_TYPE_DATA;
  850                 pda_p->raidAddress = sosAddr + (i * secPerSU);
  851                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress,
  852                     &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
  853                 /* Skip over dead disks. */
  854                 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
  855                         continue;
  856                 switch (state) {
  857                 case 1: /* Fone. */
  858                         pda_p->numSector = fone->numSector;
  859                         pda_p->raidAddress += fone_start;
  860                         pda_p->startSector += fone_start;
  861                         RF_MallocAndAdd(pda_p->bufPtr,
  862                             rf_RaidAddressToByte(raidPtr, pda_p->numSector),
  863                             (char *), allocList);
  864                         break;
  865                 case 2: /* Full stripe. */
  866                         pda_p->numSector = secPerSU;
  867                         RF_MallocAndAdd(pda_p->bufPtr,
  868                             rf_RaidAddressToByte(raidPtr, secPerSU),
  869                             (char *), allocList);
  870                         break;
  871                 case 3: /* Two slabs. */
  872                         pda_p->numSector = fone->numSector;
  873                         pda_p->raidAddress += fone_start;
  874                         pda_p->startSector += fone_start;
  875                         RF_MallocAndAdd(pda_p->bufPtr,
  876                             rf_RaidAddressToByte(raidPtr, pda_p->numSector),
  877                             (char *), allocList);
  878                         pda_p++;
  879                         pda_p->type = RF_PDA_TYPE_DATA;
  880                         pda_p->raidAddress = sosAddr + (i * secPerSU);
  881                         (raidPtr->Layout.map->MapSector) (raidPtr,
  882                             pda_p->raidAddress, &(pda_p->row), &(pda_p->col),
  883                             &(pda_p->startSector), 0);
  884                         pda_p->numSector = ftwo->numSector;
  885                         pda_p->raidAddress += ftwo_start;
  886                         pda_p->startSector += ftwo_start;
  887                         RF_MallocAndAdd(pda_p->bufPtr,
  888                             rf_RaidAddressToByte(raidPtr, pda_p->numSector),
  889                             (char *), allocList);
  890                         break;
  891                 default:
  892                         RF_PANIC();
  893                 }
  894                 pda_p++;
  895         }
  896 
  897         /* March through the touched stripe units. */
  898         for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) {
  899                 if ((phys_p == asmap->failedPDAs[0]) ||
  900                     (phys_p == asmap->failedPDAs[1]))
  901                         continue;
  902                 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector);
  903                 suend = suoff + phys_p->numSector;
  904                 switch (state) {
  905                 case 1: /* Single buffer. */
  906                         if (suoff > fone_start) {
  907                                 RF_ASSERT(suend >= fone_end);
  908                                 /*
  909                                  * The data read starts after the mapped
  910                                  * access, snip off the begining.
  911                                  */
  912                                 pda_p->numSector = suoff - fone_start;
  913                                 pda_p->raidAddress = sosAddr + (i * secPerSU)
  914                                     + fone_start;
  915                                 (raidPtr->Layout.map->MapSector) (raidPtr,
  916                                     pda_p->raidAddress, &(pda_p->row),
  917                                     &(pda_p->col), &(pda_p->startSector), 0);
  918                                 RF_MallocAndAdd(pda_p->bufPtr,
  919                                     rf_RaidAddressToByte(raidPtr,
  920                                     pda_p->numSector), (char *), allocList);
  921                                 pda_p++;
  922                         }
  923                         if (suend < fone_end) {
  924                                 RF_ASSERT(suoff <= fone_start);
  925                                 /*
  926                                  * The data read stops before the end of the
  927                                  * failed access, extend.
  928                                  */
  929                                 pda_p->numSector = fone_end - suend;
  930                                 pda_p->raidAddress = sosAddr + (i * secPerSU)
  931                                     + suend;    /* off by one? */
  932                                 (raidPtr->Layout.map->MapSector) (raidPtr,
  933                                     pda_p->raidAddress, &(pda_p->row),
  934                                     &(pda_p->col), &(pda_p->startSector), 0);
  935                                 RF_MallocAndAdd(pda_p->bufPtr,
  936                                     rf_RaidAddressToByte(raidPtr,
  937                                     pda_p->numSector), (char *), allocList);
  938                                 pda_p++;
  939                         }
  940                         break;
  941                 case 2: /* Whole stripe unit. */
  942                         RF_ASSERT((suoff == 0) || (suend == secPerSU));
  943                         if (suend < secPerSU) {
  944                                 /* Short read, snip from end on. */
  945                                 pda_p->numSector = secPerSU - suend;
  946                                 pda_p->raidAddress = sosAddr + (i * secPerSU)
  947                                     + suend;    /* off by one? */
  948                                 (raidPtr->Layout.map->MapSector) (raidPtr,
  949                                     pda_p->raidAddress, &(pda_p->row),
  950                                     &(pda_p->col), &(pda_p->startSector), 0);
  951                                 RF_MallocAndAdd(pda_p->bufPtr,
  952                                     rf_RaidAddressToByte(raidPtr,
  953                                     pda_p->numSector), (char *), allocList);
  954                                 pda_p++;
  955                         } else
  956                                 if (suoff > 0) {
  957                                         /* Short at front. */
  958                                         pda_p->numSector = suoff;
  959                                         pda_p->raidAddress = sosAddr +
  960                                             (i * secPerSU);
  961                                         (raidPtr->Layout.map->MapSector)
  962                                             (raidPtr, pda_p->raidAddress,
  963                                             &(pda_p->row), &(pda_p->col),
  964                                             &(pda_p->startSector), 0);
  965                                         RF_MallocAndAdd(pda_p->bufPtr,
  966                                             rf_RaidAddressToByte(raidPtr,
  967                                             pda_p->numSector), (char *),
  968                                             allocList);
  969                                         pda_p++;
  970                                 }
  971                         break;
  972                 case 3: /* Two nonoverlapping failures. */
  973                         if ((suoff > fone_start) || (suend < fone_end)) {
  974                                 if (suoff > fone_start) {
  975                                         RF_ASSERT(suend >= fone_end);
  976                                         /*
  977                                          * The data read starts after the
  978                                          * mapped access, snip off the
  979                                          * begining.
  980                                          */
  981                                         pda_p->numSector = suoff - fone_start;
  982                                         pda_p->raidAddress = sosAddr +
  983                                             (i * secPerSU) + fone_start;
  984                                         (raidPtr->Layout.map->MapSector)
  985                                             (raidPtr, pda_p->raidAddress,
  986                                             &(pda_p->row), &(pda_p->col),
  987                                             &(pda_p->startSector), 0);
  988                                         RF_MallocAndAdd(pda_p->bufPtr,
  989                                             rf_RaidAddressToByte(raidPtr,
  990                                             pda_p->numSector), (char *),
  991                                             allocList);
  992                                         pda_p++;
  993                                 }
  994                                 if (suend < fone_end) {
  995                                         RF_ASSERT(suoff <= fone_start);
  996                                         /*
  997                                          * The data read stops before the end
  998                                          * of the failed access, extend.
  999                                          */
 1000                                         pda_p->numSector = fone_end - suend;
 1001                                         pda_p->raidAddress = sosAddr +
 1002                                             (i * secPerSU) +
 1003                                             suend;      /* Off by one ? */
 1004                                         (raidPtr->Layout.map->MapSector)
 1005                                             (raidPtr, pda_p->raidAddress,
 1006                                             &(pda_p->row), &(pda_p->col),
 1007                                             &(pda_p->startSector), 0);
 1008                                         RF_MallocAndAdd(pda_p->bufPtr,
 1009                                             rf_RaidAddressToByte(raidPtr,
 1010                                             pda_p->numSector), (char *),
 1011                                             allocList);
 1012                                         pda_p++;
 1013                                 }
 1014                         }
 1015                         if ((suoff > ftwo_start) || (suend < ftwo_end)) {
 1016                                 if (suoff > ftwo_start) {
 1017                                         RF_ASSERT(suend >= ftwo_end);
 1018                                         /*
 1019                                          * The data read starts after the
 1020                                          * mapped access, snip off the
 1021                                          * begining.
 1022                                          */
 1023                                         pda_p->numSector = suoff - ftwo_start;
 1024                                         pda_p->raidAddress = sosAddr +
 1025                                             (i * secPerSU) + ftwo_start;
 1026                                         (raidPtr->Layout.map->MapSector)
 1027                                             (raidPtr, pda_p->raidAddress,
 1028                                             &(pda_p->row), &(pda_p->col),
 1029                                             &(pda_p->startSector), 0);
 1030                                         RF_MallocAndAdd(pda_p->bufPtr,
 1031                                             rf_RaidAddressToByte(raidPtr,
 1032                                             pda_p->numSector), (char *),
 1033                                             allocList);
 1034                                         pda_p++;
 1035                                 }
 1036                                 if (suend < ftwo_end) {
 1037                                         RF_ASSERT(suoff <= ftwo_start);
 1038                                         /*
 1039                                          * The data read stops before the end
 1040                                          * of the failed access, extend.
 1041                                          */
 1042                                         pda_p->numSector = ftwo_end - suend;
 1043                                         pda_p->raidAddress = sosAddr +
 1044                                             (i * secPerSU) +
 1045                                             suend;      /* Off by one ? */
 1046                                         (raidPtr->Layout.map->MapSector)
 1047                                             (raidPtr, pda_p->raidAddress,
 1048                                             &(pda_p->row), &(pda_p->col),
 1049                                             &(pda_p->startSector), 0);
 1050                                         RF_MallocAndAdd(pda_p->bufPtr,
 1051                                             rf_RaidAddressToByte(raidPtr,
 1052                                             pda_p->numSector), (char *),
 1053                                             allocList);
 1054                                         pda_p++;
 1055                                 }
 1056                         }
 1057                         break;
 1058                 default:
 1059                         RF_PANIC();
 1060                 }
 1061         }
 1062 
 1063         /* After the last accessed disk. */
 1064         for (; i < numDataCol; i++) {
 1065                 if ((pda_p - (*pdap)) == napdas)
 1066                         continue;
 1067                 pda_p->type = RF_PDA_TYPE_DATA;
 1068                 pda_p->raidAddress = sosAddr + (i * secPerSU);
 1069                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress,
 1070                     &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
 1071                 /* Skip over dead disks. */
 1072                 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
 1073                         continue;
 1074                 switch (state) {
 1075                 case 1: /* Fone. */
 1076                         pda_p->numSector = fone->numSector;
 1077                         pda_p->raidAddress += fone_start;
 1078                         pda_p->startSector += fone_start;
 1079                         RF_MallocAndAdd(pda_p->bufPtr,
 1080                             rf_RaidAddressToByte(raidPtr, pda_p->numSector),
 1081                             (char *), allocList);
 1082                         break;
 1083                 case 2: /* Full stripe. */
 1084                         pda_p->numSector = secPerSU;
 1085                         RF_MallocAndAdd(pda_p->bufPtr,
 1086                             rf_RaidAddressToByte(raidPtr, secPerSU),
 1087                             (char *), allocList);
 1088                         break;
 1089                 case 3: /* Two slabs. */
 1090                         pda_p->numSector = fone->numSector;
 1091                         pda_p->raidAddress += fone_start;
 1092                         pda_p->startSector += fone_start;
 1093                         RF_MallocAndAdd(pda_p->bufPtr,
 1094                             rf_RaidAddressToByte(raidPtr, pda_p->numSector),
 1095                             (char *), allocList);
 1096                         pda_p++;
 1097                         pda_p->type = RF_PDA_TYPE_DATA;
 1098                         pda_p->raidAddress = sosAddr + (i * secPerSU);
 1099                         (raidPtr->Layout.map->MapSector) (raidPtr,
 1100                             pda_p->raidAddress, &(pda_p->row), &(pda_p->col),
 1101                             &(pda_p->startSector), 0);
 1102                         pda_p->numSector = ftwo->numSector;
 1103                         pda_p->raidAddress += ftwo_start;
 1104                         pda_p->startSector += ftwo_start;
 1105                         RF_MallocAndAdd(pda_p->bufPtr,
 1106                             rf_RaidAddressToByte(raidPtr, pda_p->numSector),
 1107                             (char *), allocList);
 1108                         break;
 1109                 default:
 1110                         RF_PANIC();
 1111                 }
 1112                 pda_p++;
 1113         }
 1114 
 1115         RF_ASSERT(pda_p - *pdap == napdas);
 1116         return;
 1117 }
 1118 
 1119 #define INIT_DISK_NODE(node,name)       do {                            \
 1120         rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc,           \
 1121             rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0,         \
 1122             dag_h, name, allocList);                                    \
 1123         (node)->succedents[0] = unblockNode;                            \
 1124         (node)->succedents[1] = recoveryNode;                           \
 1125         (node)->antecedents[0] = blockNode;                             \
 1126         (node)->antType[0] = rf_control;                                \
 1127 } while (0)
 1128 
 1129 #define DISK_NODE_PARAMS(_node_,_p_)    do {                            \
 1130         (_node_).params[0].p = _p_ ;                                    \
 1131         (_node_).params[1].p = (_p_)->bufPtr;                           \
 1132         (_node_).params[2].v = parityStripeID;                          \
 1133         (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,  \
 1134             0, 0, which_ru);                                            \
 1135 } while (0)
 1136 
 1137 void
 1138 rf_DoubleDegRead(
 1139     RF_Raid_t                    *raidPtr,
 1140     RF_AccessStripeMap_t         *asmap,
 1141     RF_DagHeader_t               *dag_h,
 1142     void                         *bp,
 1143     RF_RaidAccessFlags_t          flags,
 1144     RF_AllocListElem_t           *allocList,
 1145     char                         *redundantReadNodeName,
 1146     char                         *recoveryNodeName,
 1147     int                         (*recovFunc) (RF_DagNode_t *)
 1148 )
 1149 {
 1150         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
 1151         RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode,
 1152             *unblockNode, *rpNodes, *rqNodes, *termNode;
 1153         RF_PhysDiskAddr_t *pda, *pqPDAs;
 1154         RF_PhysDiskAddr_t *npdas;
 1155         int nNodes, nRrdNodes, nRudNodes, i;
 1156         RF_ReconUnitNum_t which_ru;
 1157         int nReadNodes, nPQNodes;
 1158         RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
 1159         RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1];
 1160         RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(
 1161             layoutPtr, asmap->raidAddress, &which_ru);
 1162 
 1163         if (rf_dagDebug)
 1164                 printf("[Creating Double Degraded Read DAG]\n");
 1165         rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes,
 1166             &pqPDAs, &nPQNodes, allocList);
 1167 
 1168         nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
 1169         nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes;
 1170         nNodes = 4 /* Block, unblock, recovery, term. */ + nReadNodes;
 1171 
 1172         RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
 1173             allocList);
 1174         i = 0;
 1175         blockNode = &nodes[i];
 1176         i += 1;
 1177         unblockNode = &nodes[i];
 1178         i += 1;
 1179         recoveryNode = &nodes[i];
 1180         i += 1;
 1181         termNode = &nodes[i];
 1182         i += 1;
 1183         rudNodes = &nodes[i];
 1184         i += nRudNodes;
 1185         rrdNodes = &nodes[i];
 1186         i += nRrdNodes;
 1187         rpNodes = &nodes[i];
 1188         i += nPQNodes;
 1189         rqNodes = &nodes[i];
 1190         i += nPQNodes;
 1191         RF_ASSERT(i == nNodes);
 1192 
 1193         dag_h->numSuccedents = 1;
 1194         dag_h->succedents[0] = blockNode;
 1195         dag_h->creator = "DoubleDegRead";
 1196         dag_h->numCommits = 0;
 1197         dag_h->numCommitNodes = 1;      /* Unblock. */
 1198 
 1199         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
 1200             rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList);
 1201         termNode->antecedents[0] = unblockNode;
 1202         termNode->antType[0] = rf_control;
 1203         termNode->antecedents[1] = recoveryNode;
 1204         termNode->antType[1] = rf_control;
 1205 
 1206         /*
 1207          * Init the block and unblock nodes.
 1208          * The block node has all nodes except itself, unblock and
 1209          * recovery as successors.
 1210          * Similarly for predecessors of the unblock.
 1211          */
 1212         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
 1213             rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h,
 1214             "Nil", allocList);
 1215         rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
 1216             rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h,
 1217             "Nil", allocList);
 1218 
 1219         for (i = 0; i < nReadNodes; i++) {
 1220                 blockNode->succedents[i] = rudNodes + i;
 1221                 unblockNode->antecedents[i] = rudNodes + i;
 1222                 unblockNode->antType[i] = rf_control;
 1223         }
 1224         unblockNode->succedents[0] = termNode;
 1225 
 1226         /*
 1227          * The recovery node has all the reads as predecessors, and the term
 1228          * node as successors. It gets a pda as a param from each of the read
 1229          * nodes plus the raidPtr. For each failed unit is has a result pda.
 1230          */
 1231         rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc,
 1232             rf_NullNodeUndoFunc, NULL,
 1233             1,                          /* succesors */
 1234             nReadNodes,                 /* preds */
 1235             nReadNodes + 2,             /* params */
 1236             asmap->numDataFailed,       /* results */
 1237             dag_h, recoveryNodeName, allocList);
 1238 
 1239         recoveryNode->succedents[0] = termNode;
 1240         for (i = 0; i < nReadNodes; i++) {
 1241                 recoveryNode->antecedents[i] = rudNodes + i;
 1242                 recoveryNode->antType[i] = rf_trueData;
 1243         }
 1244 
 1245         /*
 1246          * Build the read nodes, then come back and fill in recovery params
 1247          * and results.
 1248          */
 1249         pda = asmap->physInfo;
 1250         for (i = 0; i < nRudNodes; pda = pda->next) {
 1251                 if ((pda == failedPDA) || (pda == failedPDAtwo))
 1252                         continue;
 1253                 INIT_DISK_NODE(rudNodes + i, "Rud");
 1254                 RF_ASSERT(pda);
 1255                 DISK_NODE_PARAMS(rudNodes[i], pda);
 1256                 i++;
 1257         }
 1258 
 1259         pda = npdas;
 1260         for (i = 0; i < nRrdNodes; i++, pda = pda->next) {
 1261                 INIT_DISK_NODE(rrdNodes + i, "Rrd");
 1262                 RF_ASSERT(pda);
 1263                 DISK_NODE_PARAMS(rrdNodes[i], pda);
 1264         }
 1265 
 1266         /* Redundancy pdas. */
 1267         pda = pqPDAs;
 1268         INIT_DISK_NODE(rpNodes, "Rp");
 1269         RF_ASSERT(pda);
 1270         DISK_NODE_PARAMS(rpNodes[0], pda);
 1271         pda++;
 1272         INIT_DISK_NODE(rqNodes, redundantReadNodeName);
 1273         RF_ASSERT(pda);
 1274         DISK_NODE_PARAMS(rqNodes[0], pda);
 1275         if (nPQNodes == 2) {
 1276                 pda++;
 1277                 INIT_DISK_NODE(rpNodes + 1, "Rp");
 1278                 RF_ASSERT(pda);
 1279                 DISK_NODE_PARAMS(rpNodes[1], pda);
 1280                 pda++;
 1281                 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName);
 1282                 RF_ASSERT(pda);
 1283                 DISK_NODE_PARAMS(rqNodes[1], pda);
 1284         }
 1285         /* Fill in recovery node params. */
 1286         for (i = 0; i < nReadNodes; i++)
 1287                 recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */
 1288         recoveryNode->params[i++].p = (void *) raidPtr;
 1289         recoveryNode->params[i++].p = (void *) asmap;
 1290         recoveryNode->results[0] = failedPDA;
 1291         if (asmap->numDataFailed == 2)
 1292                 recoveryNode->results[1] = failedPDAtwo;
 1293 
 1294         /* Zero fill the target data buffers ? */
 1295 }

/* [<][>][^][v][top][bottom][index][help] */