root/dev/raidframe/rf_parityloggingdags.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_CommonCreateParityLoggingLargeWriteDAG
  2. rf_CommonCreateParityLoggingSmallWriteDAG
  3. rf_CreateParityLoggingSmallWriteDAG
  4. rf_CreateParityLoggingLargeWriteDAG

    1 /*      $OpenBSD: rf_parityloggingdags.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */
    2 /*      $NetBSD: rf_parityloggingdags.c,v 1.4 2000/01/07 03:41:04 oster Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: William V. Courtright II
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 #include "rf_archs.h"
   32 
   33 #if     RF_INCLUDE_PARITYLOGGING > 0
   34 
   35 /*
   36  * DAGs specific to parity logging are created here.
   37  */
   38 
   39 #include "rf_types.h"
   40 #include "rf_raid.h"
   41 #include "rf_dag.h"
   42 #include "rf_dagutils.h"
   43 #include "rf_dagfuncs.h"
   44 #include "rf_debugMem.h"
   45 #include "rf_paritylog.h"
   46 #include "rf_memchunk.h"
   47 #include "rf_general.h"
   48 
   49 #include "rf_parityloggingdags.h"
   50 
   51 /*****************************************************************************
   52  *
   53  * Creates a DAG to perform a large-write operation:
   54  *
   55  *         / Rod \     / Wnd \
   56  * H -- NIL- Rod - NIL - Wnd ------ NIL - T
   57  *         \ Rod /     \ Xor - Lpo /
   58  *
   59  * The writes are not done until the reads complete because if they were done
   60  * in parallel, a failure on one of the reads could leave the parity in an
   61  * inconsistent state, so that the retry with a new DAG would produce
   62  * erroneous parity.
   63  *
   64  * Note:  This DAG has the nasty property that none of the buffers allocated
   65  *        for reading old data can be freed until the XOR node fires.
   66  *        Need to fix this.
   67  *
   68  * The last two arguments are the number of faults tolerated, and function
   69  * for the redundancy calculation. The undo for the redundancy calc is assumed
   70  * to be null.
   71  *
   72  *****************************************************************************/
   73 
   74 void
   75 rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr,
   76     RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
   77     RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults,
   78     int (*redFunc) (RF_DagNode_t *))
   79 {
   80         RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode;
   81         RF_DagNode_t *lpoNode, *blockNode, *unblockNode, *termNode;
   82         int nWndNodes, nRodNodes, i;
   83         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
   84         RF_AccessStripeMapHeader_t *new_asm_h[2];
   85         int nodeNum, asmNum;
   86         RF_ReconUnitNum_t which_ru;
   87         char *sosBuffer, *eosBuffer;
   88         RF_PhysDiskAddr_t *pda;
   89         RF_StripeNum_t parityStripeID =
   90             rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
   91              asmap->raidAddress, &which_ru);
   92 
   93         if (rf_dagDebug)
   94                 printf("[Creating parity-logging large-write DAG]\n");
   95         RF_ASSERT(nfaults == 1); /* This arch only single fault tolerant. */
   96         dag_h->creator = "ParityLoggingLargeWriteDAG";
   97 
   98         /* Alloc the Wnd nodes, the xor node, and the Lpo node. */
   99         nWndNodes = asmap->numStripeUnitsAccessed;
  100         RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t),
  101             (RF_DagNode_t *), allocList);
  102         i = 0;
  103         wndNodes = &nodes[i];
  104         i += nWndNodes;
  105         xorNode = &nodes[i];
  106         i += 1;
  107         lpoNode = &nodes[i];
  108         i += 1;
  109         blockNode = &nodes[i];
  110         i += 1;
  111         syncNode = &nodes[i];
  112         i += 1;
  113         unblockNode = &nodes[i];
  114         i += 1;
  115         termNode = &nodes[i];
  116         i += 1;
  117 
  118         dag_h->numCommitNodes = nWndNodes + 1;
  119         dag_h->numCommits = 0;
  120         dag_h->numSuccedents = 1;
  121 
  122         rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
  123             new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList);
  124         if (nRodNodes > 0)
  125                 RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t),
  126                     (RF_DagNode_t *), allocList);
  127 
  128         /* Begin node initialization. */
  129         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  130             rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h,
  131             "Nil", allocList);
  132         rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  133             rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h,
  134             "Nil", allocList);
  135         rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  136             rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1,
  137             0, 0, dag_h, "Nil", allocList);
  138         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  139             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  140 
  141         /* Initialize the Rod nodes. */
  142         for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
  143                 if (new_asm_h[asmNum]) {
  144                         pda = new_asm_h[asmNum]->stripeMap->physInfo;
  145                         while (pda) {
  146                                 rf_InitNode(&rodNodes[nodeNum], rf_wait,
  147                                     RF_FALSE, rf_DiskReadFunc,
  148                                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc,
  149                                     1, 1, 4, 0, dag_h, "Rod", allocList);
  150                                 rodNodes[nodeNum].params[0].p = pda;
  151                                 rodNodes[nodeNum].params[1].p = pda->bufPtr;
  152                                 rodNodes[nodeNum].params[2].v = parityStripeID;
  153                                 rodNodes[nodeNum].params[3].v =
  154                                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  155                                      0, 0, which_ru);
  156                                 nodeNum++;
  157                                 pda = pda->next;
  158                         }
  159                 }
  160         }
  161         RF_ASSERT(nodeNum == nRodNodes);
  162 
  163         /* Initialize the wnd nodes. */
  164         pda = asmap->physInfo;
  165         for (i = 0; i < nWndNodes; i++) {
  166                 rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc,
  167                     rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  168                     dag_h, "Wnd", allocList);
  169                 RF_ASSERT(pda != NULL);
  170                 wndNodes[i].params[0].p = pda;
  171                 wndNodes[i].params[1].p = pda->bufPtr;
  172                 wndNodes[i].params[2].v = parityStripeID;
  173                 wndNodes[i].params[3].v =
  174                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  175                 pda = pda->next;
  176         }
  177 
  178         /* Initialize the redundancy node. */
  179         rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc,
  180             NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h,
  181             "Xr ", allocList);
  182         xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
  183         for (i = 0; i < nWndNodes; i++) {
  184                 /* pda */
  185                 xorNode->params[2 * i + 0] = wndNodes[i].params[0];
  186                 /* buf ptr */
  187                 xorNode->params[2 * i + 1] = wndNodes[i].params[1];
  188         }
  189         for (i = 0; i < nRodNodes; i++) {
  190                 xorNode->params[2 * (nWndNodes + i) + 0] =
  191                     rodNodes[i].params[0];      /* pda */
  192                 xorNode->params[2 * (nWndNodes + i) + 1] =
  193                     rodNodes[i].params[1];      /* buf ptr */
  194         }
  195         /* Xor node needs to get at RAID information. */
  196         xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
  197 
  198         /*
  199          * Look for an Rod node that reads a complete SU. If none, alloc a
  200          * buffer to receive the parity info. Note that we can't use a new
  201          * data buffer because it will not have gotten written when the xor
  202          * occurs.
  203          */
  204         for (i = 0; i < nRodNodes; i++)
  205                 if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)
  206                     ->numSector == raidPtr->Layout.sectorsPerStripeUnit)
  207                         break;
  208         if (i == nRodNodes) {
  209                 RF_CallocAndAdd(xorNode->results[0], 1,
  210                     rf_RaidAddressToByte(raidPtr,
  211                      raidPtr->Layout.sectorsPerStripeUnit), (void *),
  212                     allocList);
  213         } else {
  214                 xorNode->results[0] = rodNodes[i].params[1].p;
  215         }
  216 
  217         /* Initialize the Lpo node. */
  218         rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc,
  219             rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0,
  220             dag_h, "Lpo", allocList);
  221 
  222         lpoNode->params[0].p = asmap->parityInfo;
  223         lpoNode->params[1].p = xorNode->results[0];
  224         /* parityInfo must describe entire parity unit. */
  225         RF_ASSERT(asmap->parityInfo->next == NULL);
  226 
  227         /* Connect nodes to form graph. */
  228 
  229         /* Connect dag header to block node. */
  230         RF_ASSERT(dag_h->numSuccedents == 1);
  231         RF_ASSERT(blockNode->numAntecedents == 0);
  232         dag_h->succedents[0] = blockNode;
  233 
  234         /* Connect the block node to the Rod nodes. */
  235         RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1);
  236         for (i = 0; i < nRodNodes; i++) {
  237                 RF_ASSERT(rodNodes[i].numAntecedents == 1);
  238                 blockNode->succedents[i] = &rodNodes[i];
  239                 rodNodes[i].antecedents[0] = blockNode;
  240                 rodNodes[i].antType[0] = rf_control;
  241         }
  242 
  243         /* Connect the block node to the sync node. */
  244         /* necessary if nRodNodes == 0 */
  245         RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1);
  246         blockNode->succedents[nRodNodes] = syncNode;
  247         syncNode->antecedents[0] = blockNode;
  248         syncNode->antType[0] = rf_control;
  249 
  250         /* Connect the Rod nodes to the syncNode. */
  251         for (i = 0; i < nRodNodes; i++) {
  252                 rodNodes[i].succedents[0] = syncNode;
  253                 syncNode->antecedents[1 + i] = &rodNodes[i];
  254                 syncNode->antType[1 + i] = rf_control;
  255         }
  256 
  257         /* Connect the sync node to the xor node. */
  258         RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1);
  259         RF_ASSERT(xorNode->numAntecedents == 1);
  260         syncNode->succedents[0] = xorNode;
  261         xorNode->antecedents[0] = syncNode;
  262         xorNode->antType[0] = rf_trueData;      /* Carry forward from sync. */
  263 
  264         /* Connect the sync node to the Wnd nodes. */
  265         for (i = 0; i < nWndNodes; i++) {
  266                 RF_ASSERT(wndNodes->numAntecedents == 1);
  267                 syncNode->succedents[1 + i] = &wndNodes[i];
  268                 wndNodes[i].antecedents[0] = syncNode;
  269                 wndNodes[i].antType[0] = rf_control;
  270         }
  271 
  272         /* Connect the xor node to the Lpo node. */
  273         RF_ASSERT(xorNode->numSuccedents == 1);
  274         RF_ASSERT(lpoNode->numAntecedents == 1);
  275         xorNode->succedents[0] = lpoNode;
  276         lpoNode->antecedents[0] = xorNode;
  277         lpoNode->antType[0] = rf_trueData;
  278 
  279         /* Connect the Wnd nodes to the unblock node. */
  280         RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1);
  281         for (i = 0; i < nWndNodes; i++) {
  282                 RF_ASSERT(wndNodes->numSuccedents == 1);
  283                 wndNodes[i].succedents[0] = unblockNode;
  284                 unblockNode->antecedents[i] = &wndNodes[i];
  285                 unblockNode->antType[i] = rf_control;
  286         }
  287 
  288         /* Connect the Lpo node to the unblock node. */
  289         RF_ASSERT(lpoNode->numSuccedents == 1);
  290         lpoNode->succedents[0] = unblockNode;
  291         unblockNode->antecedents[nWndNodes] = lpoNode;
  292         unblockNode->antType[nWndNodes] = rf_control;
  293 
  294         /* Connect unblock node to terminator. */
  295         RF_ASSERT(unblockNode->numSuccedents == 1);
  296         RF_ASSERT(termNode->numAntecedents == 1);
  297         RF_ASSERT(termNode->numSuccedents == 0);
  298         unblockNode->succedents[0] = termNode;
  299         termNode->antecedents[0] = unblockNode;
  300         termNode->antType[0] = rf_control;
  301 }
  302 
  303 
  304 /*****************************************************************************
  305  *
  306  * Creates a DAG to perform a small-write operation (either raid 5 or pq),
  307  * which is as follows:
  308  *
  309  *                                     Header
  310  *                                        |
  311  *                                      Block
  312  *                                  / |  ... \   \
  313  *                                 /  |       \   \
  314  *                              Rod  Rod      Rod  Rop
  315  *                               | \ /| \    / |  \/ |
  316  *                               |    |        |  /\ |
  317  *                              Wnd  Wnd      Wnd   X
  318  *                               |    \       /     |
  319  *                               |     \     /      |
  320  *                                \     \   /      Lpo
  321  *                                 \     \ /       /
  322  *                                  +-> Unblock <-+
  323  *                                        |
  324  *                                        T
  325  *
  326  *
  327  * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity.
  328  * When the access spans a stripe unit boundary and is less than one SU in
  329  * size, there will be two Rop -- X -- Wnp branches. I call this the
  330  * "double-XOR" case.
  331  * The second output from each Rod node goes to the X node. In the double-XOR
  332  * case, there are exactly 2 Rod nodes, and each sends one output to one X
  333  * node.
  334  * There is one Rod -- Wnd -- T branch for each stripe unit being updated.
  335  *
  336  * The block and unblock nodes are unused. See comment above
  337  * CreateFaultFreeReadDAG.
  338  *
  339  * Note:  This DAG ignores all the optimizations related to making the RMWs
  340  *        atomic.
  341  *        It also has the nasty property that none of the buffers allocated
  342  *        for reading old data & parity can be freed until the XOR node fires.
  343  *        Need to fix this.
  344  *
  345  * A null qfuncs indicates single fault tolerant.
  346  *****************************************************************************/
  347 
  348 void
  349 rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t *raidPtr,
  350     RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
  351     RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
  352     RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs)
  353 {
  354         RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes;
  355         RF_DagNode_t *readDataNodes, *readParityNodes;
  356         RF_DagNode_t *writeDataNodes, *lpuNodes;
  357         RF_DagNode_t *unlockDataNodes = NULL, *termNode;
  358         RF_PhysDiskAddr_t *pda = asmap->physInfo;
  359         int numDataNodes = asmap->numStripeUnitsAccessed;
  360         int numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
  361         int i, j, nNodes, totalNumNodes;
  362         RF_ReconUnitNum_t which_ru;
  363         int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node);
  364         int (*qfunc) (RF_DagNode_t * node);
  365         char*name, *qname;
  366         RF_StripeNum_t parityStripeID =
  367             rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
  368              asmap->raidAddress, &which_ru);
  369         long nfaults = qfuncs ? 2 : 1;
  370         int lu_flag = (rf_enableAtomicRMW) ? 1 : 0;     /* Lock/unlock flag. */
  371 
  372         if (rf_dagDebug)
  373                 printf("[Creating parity-logging small-write DAG]\n");
  374         RF_ASSERT(numDataNodes > 0);
  375         RF_ASSERT(nfaults == 1);
  376         dag_h->creator = "ParityLoggingSmallWriteDAG";
  377 
  378         /*
  379          * DAG creation occurs in three steps:
  380          * 1. Count the number of nodes in the DAG.
  381          * 2. Create the nodes.
  382          * 3. Initialize the nodes.
  383          * 4. Connect the nodes.
  384          */
  385 
  386         /* Step 1. Compute number of nodes in the graph. */
  387 
  388         /*
  389          * Number of nodes: a read and write for each data unit, a redundancy
  390          * computation node for each parity node, a read and Lpu for each
  391          * parity unit, a block and unblock node (2), a terminator node if
  392          * atomic RMW, an unlock node for each data and redundancy unit.
  393          */
  394         totalNumNodes = (2 * numDataNodes) + numParityNodes +
  395             (2 * numParityNodes) + 3;
  396         if (lu_flag)
  397                 totalNumNodes += numDataNodes;
  398 
  399         nNodes = numDataNodes + numParityNodes;
  400 
  401         dag_h->numCommitNodes = numDataNodes + numParityNodes;
  402         dag_h->numCommits = 0;
  403         dag_h->numSuccedents = 1;
  404 
  405         /* Step 2. Create the nodes. */
  406         RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
  407             (RF_DagNode_t *), allocList);
  408         i = 0;
  409         blockNode = &nodes[i];
  410         i += 1;
  411         unblockNode = &nodes[i];
  412         i += 1;
  413         readDataNodes = &nodes[i];
  414         i += numDataNodes;
  415         readParityNodes = &nodes[i];
  416         i += numParityNodes;
  417         writeDataNodes = &nodes[i];
  418         i += numDataNodes;
  419         lpuNodes = &nodes[i];
  420         i += numParityNodes;
  421         xorNodes = &nodes[i];
  422         i += numParityNodes;
  423         termNode = &nodes[i];
  424         i += 1;
  425         if (lu_flag) {
  426                 unlockDataNodes = &nodes[i];
  427                 i += numDataNodes;
  428         }
  429         RF_ASSERT(i == totalNumNodes);
  430 
  431         /* Step 3. Initialize the nodes. */
  432         /* Initialize block node (Nil). */
  433         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  434             rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h,
  435             "Nil", allocList);
  436 
  437         /* Initialize unblock node (Nil). */
  438         rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  439             rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h,
  440             "Nil", allocList);
  441 
  442         /* Initialize terminatory node (Trm). */
  443         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  444             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  445 
  446         /* Initialize nodes which read old data (Rod). */
  447         for (i = 0; i < numDataNodes; i++) {
  448                 rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE,
  449                     rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc,
  450                     nNodes, 1, 4, 0, dag_h, "Rod", allocList);
  451                 RF_ASSERT(pda != NULL);
  452                 /* Physical disk addr desc. */
  453                 readDataNodes[i].params[0].p = pda;
  454                 readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h,
  455                     pda, allocList);    /* Buffer to hold old data. */
  456                 readDataNodes[i].params[2].v = parityStripeID;
  457                 readDataNodes[i].params[3].v =
  458                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag,
  459                     0, which_ru);
  460                 pda = pda->next;
  461                 readDataNodes[i].propList[0] = NULL;
  462                 readDataNodes[i].propList[1] = NULL;
  463         }
  464 
  465         /* Initialize nodes which read old parity (Rop). */
  466         pda = asmap->parityInfo;
  467         i = 0;
  468         for (i = 0; i < numParityNodes; i++) {
  469                 RF_ASSERT(pda != NULL);
  470                 rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE,
  471                     rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc,
  472                     nNodes, 1, 4, 0, dag_h, "Rop", allocList);
  473                 readParityNodes[i].params[0].p = pda;
  474                 readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h,
  475                     pda, allocList);    /* Buffer to hold old parity. */
  476                 readParityNodes[i].params[2].v = parityStripeID;
  477                 readParityNodes[i].params[3].v =
  478                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  479                 readParityNodes[i].propList[0] = NULL;
  480                 pda = pda->next;
  481         }
  482 
  483         /* Initialize nodes which write new data (Wnd). */
  484         pda = asmap->physInfo;
  485         for (i = 0; i < numDataNodes; i++) {
  486                 RF_ASSERT(pda != NULL);
  487                 rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE,
  488                     rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
  489                     rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h,
  490                     "Wnd", allocList);
  491                 /* Physical disk addr desc. */
  492                 writeDataNodes[i].params[0].p = pda;
  493                 /* Buffer holding new data to be written. */
  494                 writeDataNodes[i].params[1].p = pda->bufPtr;
  495                 writeDataNodes[i].params[2].v = parityStripeID;
  496                 writeDataNodes[i].params[3].v =
  497                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
  498 
  499                 if (lu_flag) {
  500                         /* Initialize node to unlock the disk queue. */
  501                         rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE,
  502                             rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc,
  503                             rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h,
  504                             "Und", allocList);
  505                         /* Physical disk addr desc. */
  506                         unlockDataNodes[i].params[0].p = pda;
  507                         unlockDataNodes[i].params[1].v =
  508                             RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0,
  509                             lu_flag, which_ru);
  510                 }
  511                 pda = pda->next;
  512         }
  513 
  514 
  515         /* Initialize nodes which compute new parity. */
  516         /*
  517          * We use the simple XOR func in the double-XOR case, and when we're
  518          * accessing only a portion of one stripe unit. The distinction
  519          * between the two is that the regular XOR func assumes that the
  520          * targbuf is a full SU in size, and examines the pda associated with
  521          * the buffer to decide where within the buffer to XOR the data,
  522          * whereas the simple XOR func just XORs the data into the start of
  523          * the buffer.
  524          */
  525         if ((numParityNodes == 2) || ((numDataNodes == 1) &&
  526             (asmap->totalSectorsAccessed <
  527              raidPtr->Layout.sectorsPerStripeUnit))) {
  528                 func = pfuncs->simple;
  529                 undoFunc = rf_NullNodeUndoFunc;
  530                 name = pfuncs->SimpleName;
  531                 if (qfuncs) {
  532                         qfunc = qfuncs->simple;
  533                         qname = qfuncs->SimpleName;
  534                 }
  535         } else {
  536                 func = pfuncs->regular;
  537                 undoFunc = rf_NullNodeUndoFunc;
  538                 name = pfuncs->RegularName;
  539                 if (qfuncs) {
  540                         qfunc = qfuncs->regular;
  541                         qname = qfuncs->RegularName;
  542                 }
  543         }
  544         /*
  545          * Initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop}
  546          * nodes, and raidPtr.
  547          */
  548         if (numParityNodes == 2) {      /* Double-XOR case. */
  549                 for (i = 0; i < numParityNodes; i++) {
  550                         rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func,
  551                             undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name,
  552                             allocList); /* No wakeup func for XOR. */
  553                         xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
  554                         xorNodes[i].params[0] = readDataNodes[i].params[0];
  555                         xorNodes[i].params[1] = readDataNodes[i].params[1];
  556                         xorNodes[i].params[2] = readParityNodes[i].params[0];
  557                         xorNodes[i].params[3] = readParityNodes[i].params[1];
  558                         xorNodes[i].params[4] = writeDataNodes[i].params[0];
  559                         xorNodes[i].params[5] = writeDataNodes[i].params[1];
  560                         xorNodes[i].params[6].p = raidPtr;
  561                         /* Use old parity buf as target buf. */
  562                         xorNodes[i].results[0] = readParityNodes[i].params[1].p;
  563                 }
  564         } else {
  565                 /* There is only one xor node in this case. */
  566                 rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc,
  567                     NULL, 1, nNodes,
  568                     (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
  569                     dag_h, name, allocList);
  570                 xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
  571                 for (i = 0; i < numDataNodes + 1; i++) {
  572                         /* Set up params related to Rod and Rop nodes. */
  573                         xorNodes[0].params[2 * i + 0] =
  574                             readDataNodes[i].params[0]; /* pda */
  575                         xorNodes[0].params[2 * i + 1] =
  576                             readDataNodes[i].params[1]; /* Buffer pointer */
  577                 }
  578                 for (i = 0; i < numDataNodes; i++) {
  579                         /* Set up params related to Wnd and Wnp nodes. */
  580                         xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] =
  581                             writeDataNodes[i].params[0]; /* pda */
  582                         xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] =
  583                             writeDataNodes[i].params[1]; /* Buffer pointer */
  584                 }
  585                 xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p =
  586                     raidPtr;    /* Xor node needs to get at RAID information. */
  587                 xorNodes[0].results[0] = readParityNodes[0].params[1].p;
  588         }
  589 
  590         /* Initialize the log node(s). */
  591         pda = asmap->parityInfo;
  592         for (i = 0; i < numParityNodes; i++) {
  593                 RF_ASSERT(pda);
  594                 rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE,
  595                     rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc,
  596                     rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList);
  597                 lpuNodes[i].params[0].p = pda;  /* PhysDiskAddr of parity. */
  598                 /* Buffer pointer to parity. */
  599                 lpuNodes[i].params[1].p = xorNodes[i].results[0];
  600                 pda = pda->next;
  601         }
  602 
  603 
  604         /* Step 4. Connect the nodes. */
  605 
  606         /* Connect header to block node. */
  607         RF_ASSERT(dag_h->numSuccedents == 1);
  608         RF_ASSERT(blockNode->numAntecedents == 0);
  609         dag_h->succedents[0] = blockNode;
  610 
  611         /* Connect block node to read old data nodes. */
  612         RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes));
  613         for (i = 0; i < numDataNodes; i++) {
  614                 blockNode->succedents[i] = &readDataNodes[i];
  615                 RF_ASSERT(readDataNodes[i].numAntecedents == 1);
  616                 readDataNodes[i].antecedents[0] = blockNode;
  617                 readDataNodes[i].antType[0] = rf_control;
  618         }
  619 
  620         /* Connect block node to read old parity nodes. */
  621         for (i = 0; i < numParityNodes; i++) {
  622                 blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
  623                 RF_ASSERT(readParityNodes[i].numAntecedents == 1);
  624                 readParityNodes[i].antecedents[0] = blockNode;
  625                 readParityNodes[i].antType[0] = rf_control;
  626         }
  627 
  628         /* Connect read old data nodes to write new data nodes. */
  629         for (i = 0; i < numDataNodes; i++) {
  630                 RF_ASSERT(readDataNodes[i].numSuccedents ==
  631                           numDataNodes + numParityNodes);
  632                 for (j = 0; j < numDataNodes; j++) {
  633                         RF_ASSERT(writeDataNodes[j].numAntecedents ==
  634                                   numDataNodes + numParityNodes);
  635                         readDataNodes[i].succedents[j] = &writeDataNodes[j];
  636                         writeDataNodes[j].antecedents[i] = &readDataNodes[i];
  637                         if (i == j)
  638                                 writeDataNodes[j].antType[i] = rf_antiData;
  639                         else
  640                                 writeDataNodes[j].antType[i] = rf_control;
  641                 }
  642         }
  643 
  644         /* Connect read old data nodes to xor nodes. */
  645         for (i = 0; i < numDataNodes; i++)
  646                 for (j = 0; j < numParityNodes; j++) {
  647                         RF_ASSERT(xorNodes[j].numAntecedents ==
  648                                   numDataNodes + numParityNodes);
  649                         readDataNodes[i].succedents[numDataNodes + j] =
  650                             &xorNodes[j];
  651                         xorNodes[j].antecedents[i] = &readDataNodes[i];
  652                         xorNodes[j].antType[i] = rf_trueData;
  653                 }
  654 
  655         /* Connect read old parity nodes to write new data nodes. */
  656         for (i = 0; i < numParityNodes; i++) {
  657                 RF_ASSERT(readParityNodes[i].numSuccedents ==
  658                           numDataNodes + numParityNodes);
  659                 for (j = 0; j < numDataNodes; j++) {
  660                         readParityNodes[i].succedents[j] = &writeDataNodes[j];
  661                         writeDataNodes[j].antecedents[numDataNodes + i] =
  662                             &readParityNodes[i];
  663                         writeDataNodes[j].antType[numDataNodes + i] =
  664                             rf_control;
  665                 }
  666         }
  667 
  668         /* Connect read old parity nodes to xor nodes. */
  669         for (i = 0; i < numParityNodes; i++)
  670                 for (j = 0; j < numParityNodes; j++) {
  671                         readParityNodes[i].succedents[numDataNodes + j] =
  672                             &xorNodes[j];
  673                         xorNodes[j].antecedents[numDataNodes + i] =
  674                             &readParityNodes[i];
  675                         xorNodes[j].antType[numDataNodes + i] = rf_trueData;
  676                 }
  677 
  678         /* Connect xor nodes to write new parity nodes. */
  679         for (i = 0; i < numParityNodes; i++) {
  680                 RF_ASSERT(xorNodes[i].numSuccedents == 1);
  681                 RF_ASSERT(lpuNodes[i].numAntecedents == 1);
  682                 xorNodes[i].succedents[0] = &lpuNodes[i];
  683                 lpuNodes[i].antecedents[0] = &xorNodes[i];
  684                 lpuNodes[i].antType[0] = rf_trueData;
  685         }
  686 
  687         for (i = 0; i < numDataNodes; i++) {
  688                 if (lu_flag) {
  689                         /* Connect write new data nodes to unlock nodes. */
  690                         RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
  691                         RF_ASSERT(unlockDataNodes[i].numAntecedents == 1);
  692                         writeDataNodes[i].succedents[0] = &unlockDataNodes[i];
  693                         unlockDataNodes[i].antecedents[0] = &writeDataNodes[i];
  694                         unlockDataNodes[i].antType[0] = rf_control;
  695 
  696                         /* Connect unlock nodes to unblock node. */
  697                         RF_ASSERT(unlockDataNodes[i].numSuccedents == 1);
  698                         RF_ASSERT(unblockNode->numAntecedents ==
  699                                   (numDataNodes + (nfaults * numParityNodes)));
  700                         unlockDataNodes[i].succedents[0] = unblockNode;
  701                         unblockNode->antecedents[i] = &unlockDataNodes[i];
  702                         unblockNode->antType[i] = rf_control;
  703                 } else {
  704                         /* Connect write new data nodes to unblock node. */
  705                         RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
  706                         RF_ASSERT(unblockNode->numAntecedents ==
  707                                   (numDataNodes + (nfaults * numParityNodes)));
  708                         writeDataNodes[i].succedents[0] = unblockNode;
  709                         unblockNode->antecedents[i] = &writeDataNodes[i];
  710                         unblockNode->antType[i] = rf_control;
  711                 }
  712         }
  713 
  714         /* Connect write new parity nodes to unblock node. */
  715         for (i = 0; i < numParityNodes; i++) {
  716                 RF_ASSERT(lpuNodes[i].numSuccedents == 1);
  717                 lpuNodes[i].succedents[0] = unblockNode;
  718                 unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i];
  719                 unblockNode->antType[numDataNodes + i] = rf_control;
  720         }
  721 
  722         /* Connect unblock node to terminator. */
  723         RF_ASSERT(unblockNode->numSuccedents == 1);
  724         RF_ASSERT(termNode->numAntecedents == 1);
  725         RF_ASSERT(termNode->numSuccedents == 0);
  726         unblockNode->succedents[0] = termNode;
  727         termNode->antecedents[0] = unblockNode;
  728         termNode->antType[0] = rf_control;
  729 }
  730 
  731 
  732 void
  733 rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t *raidPtr,
  734     RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
  735     RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
  736     RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs)
  737 {
  738         dag_h->creator = "ParityLoggingSmallWriteDAG";
  739         rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp,
  740             flags, allocList, &rf_xorFuncs, NULL);
  741 }
  742 
  743 
  744 void
  745 rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t *raidPtr,
  746     RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
  747     RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults,
  748     int (*redFunc) (RF_DagNode_t *))
  749 {
  750         dag_h->creator = "ParityLoggingSmallWriteDAG";
  751         rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp,
  752             flags, allocList, 1, rf_RegularXorFunc);
  753 }
  754 #endif  /* RF_INCLUDE_PARITYLOGGING > 0 */

/* [<][>][^][v][top][bottom][index][help] */