root/dev/raidframe/rf_dagffrd.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_CreateFaultFreeReadDAG
  2. rf_CreateNonredundantDAG
  3. rf_CreateMirrorReadDAG
  4. rf_CreateMirrorIdleReadDAG
  5. rf_CreateMirrorPartitionReadDAG

    1 /*      $OpenBSD: rf_dagffrd.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $   */
    2 /*      $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $     */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*
   32  * rf_dagffrd.c
   33  *
   34  * Code for creating fault-free read DAGs.
   35  *
   36  */
   37 
   38 #include "rf_types.h"
   39 #include "rf_raid.h"
   40 #include "rf_dag.h"
   41 #include "rf_dagutils.h"
   42 #include "rf_dagfuncs.h"
   43 #include "rf_debugMem.h"
   44 #include "rf_memchunk.h"
   45 #include "rf_general.h"
   46 #include "rf_dagffrd.h"
   47 
   48 void rf_CreateMirrorReadDAG( RF_Raid_t *, RF_AccessStripeMap_t *,
   49         RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *,
   50         int (*) (RF_DagNode_t *));
   51 
   52 /*****************************************************************************
   53  *
   54  * General comments on DAG creation:
   55  *
   56  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
   57  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
   58  * is reached, the execution engine will halt forward execution and work
   59  * backward through the graph, executing the undo functions.  Assuming that
   60  * each node in the graph prior to the Cmt node are undoable and atomic - or -
   61  * does not make changes to permanent state, the graph will fail atomically.
   62  * If an error occurs after the Cmt node executes, the engine will roll-forward
   63  * through the graph, blindly executing nodes until it reaches the end.
   64  * If a graph reaches the end, it is assumed to have completed successfully.
   65  *
   66  * A graph has only 1 Cmt node.
   67  *
   68  *****************************************************************************/
   69 
   70 
   71 /*****************************************************************************
   72  *
   73  * The following wrappers map the standard DAG creation interface to the
   74  * DAG creation routines.  Additionally, these wrappers enable experimentation
   75  * with new DAG structures by providing an extra level of indirection, allowing
   76  * the DAG creation routines to be replaced at this single point.
   77  *
   78  *****************************************************************************/
   79 
   80 void
   81 rf_CreateFaultFreeReadDAG(
   82         RF_Raid_t               *raidPtr,
   83         RF_AccessStripeMap_t    *asmap,
   84         RF_DagHeader_t          *dag_h,
   85         void                    *bp,
   86         RF_RaidAccessFlags_t     flags,
   87         RF_AllocListElem_t      *allocList
   88 )
   89 {
   90         rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
   91             RF_IO_TYPE_READ);
   92 }
   93 
   94 
   95 /*****************************************************************************
   96  *
   97  * DAG creation code begins here.
   98  *
   99  *****************************************************************************/
  100 
  101 /*****************************************************************************
  102  *
  103  * Creates a DAG to perform a nonredundant read or write of data within one
  104  * stripe.
  105  * For reads, this DAG is as follows:
  106  *
  107  *                   /---- read ----\
  108  *    Header -- Block ---- read ---- Commit -- Terminate
  109  *                   \---- read ----/
  110  *
  111  * For writes, this DAG is as follows:
  112  *
  113  *                    /---- write ----\
  114  *    Header -- Commit ---- write ---- Block -- Terminate
  115  *                    \---- write ----/
  116  *
  117  * There is one disk node per stripe unit accessed, and all disk nodes are in
  118  * parallel.
  119  *
  120  * Tricky point here:  The first disk node (read or write) is created
  121  * normally.  Subsequent disk nodes are created by copying the first one,
  122  * and modifying a few params.  The "succedents" and "antecedents" fields are
  123  * _not_ re-created in each node, but rather left pointing to the same array
  124  * that was malloc'd when the first node was created.  Thus, it's essential
  125  * that when this DAG is freed, the succedents and antecedents fields be freed
  126  * in ONLY ONE of the read nodes.  This does not apply to the "params" field
  127  * because it is recreated for each READ node.
  128  *
  129  * Note that normal-priority accesses do not need to be tagged with their
  130  * parity stripe ID, because they will never be promoted.  Hence, I've
  131  * commented-out the code to do this, and marked it with UNNEEDED.
  132  *
  133  *****************************************************************************/
  134 
  135 void
  136 rf_CreateNonredundantDAG(
  137         RF_Raid_t               *raidPtr,
  138         RF_AccessStripeMap_t    *asmap,
  139         RF_DagHeader_t          *dag_h,
  140         void                    *bp,
  141         RF_RaidAccessFlags_t     flags,
  142         RF_AllocListElem_t      *allocList,
  143         RF_IoType_t              type
  144 )
  145 {
  146         RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
  147         RF_PhysDiskAddr_t *pda = asmap->physInfo;
  148         int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
  149         int i, n, totalNumNodes;
  150         char *name;
  151 
  152         n = asmap->numStripeUnitsAccessed;
  153         dag_h->creator = "NonredundantDAG";
  154 
  155         RF_ASSERT(RF_IO_IS_R_OR_W(type));
  156         switch (type) {
  157         case RF_IO_TYPE_READ:
  158                 doFunc = rf_DiskReadFunc;
  159                 undoFunc = rf_DiskReadUndoFunc;
  160                 name = "R  ";
  161                 if (rf_dagDebug)
  162                         printf("[Creating non-redundant read DAG]\n");
  163                 break;
  164         case RF_IO_TYPE_WRITE:
  165                 doFunc = rf_DiskWriteFunc;
  166                 undoFunc = rf_DiskWriteUndoFunc;
  167                 name = "W  ";
  168                 if (rf_dagDebug)
  169                         printf("[Creating non-redundant write DAG]\n");
  170                 break;
  171         default:
  172                 RF_PANIC();
  173         }
  174 
  175         /*
  176          * For reads, the dag can not commit until the block node is reached.
  177          * For writes, the dag commits immediately.
  178          */
  179         dag_h->numCommitNodes = 1;
  180         dag_h->numCommits = 0;
  181         dag_h->numSuccedents = 1;
  182 
  183         /*
  184          * Node count:
  185          * 1 block node
  186          * n data reads (or writes)
  187          * 1 commit node
  188          * 1 terminator node
  189          */
  190         RF_ASSERT(n > 0);
  191         totalNumNodes = n + 3;
  192         RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
  193             (RF_DagNode_t *), allocList);
  194         i = 0;
  195         diskNodes = &nodes[i];
  196         i += n;
  197         blockNode = &nodes[i];
  198         i += 1;
  199         commitNode = &nodes[i];
  200         i += 1;
  201         termNode = &nodes[i];
  202         i += 1;
  203         RF_ASSERT(i == totalNumNodes);
  204 
  205         /* Initialize nodes. */
  206         switch (type) {
  207         case RF_IO_TYPE_READ:
  208                 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  209                     rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil",
  210                     allocList);
  211                 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  212                     rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt",
  213                     allocList);
  214                 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  215                     rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm",
  216                     allocList);
  217                 break;
  218         case RF_IO_TYPE_WRITE:
  219                 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  220                     rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil",
  221                     allocList);
  222                 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  223                     rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt",
  224                     allocList);
  225                 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  226                     rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm",
  227                     allocList);
  228                 break;
  229         default:
  230                 RF_PANIC();
  231         }
  232 
  233         for (i = 0; i < n; i++) {
  234                 RF_ASSERT(pda != NULL);
  235                 rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc,
  236                     rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList);
  237                 diskNodes[i].params[0].p = pda;
  238                 diskNodes[i].params[1].p = pda->bufPtr;
  239                 /* Parity stripe id is not necessary. */
  240                 diskNodes[i].params[2].v = 0;
  241                 diskNodes[i].params[3].v =
  242                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
  243                 pda = pda->next;
  244         }
  245 
  246         /*
  247          * Connect nodes.
  248          */
  249 
  250         /* Connect hdr to block node. */
  251         RF_ASSERT(blockNode->numAntecedents == 0);
  252         dag_h->succedents[0] = blockNode;
  253 
  254         if (type == RF_IO_TYPE_READ) {
  255                 /* Connecting a nonredundant read DAG. */
  256                 RF_ASSERT(blockNode->numSuccedents == n);
  257                 RF_ASSERT(commitNode->numAntecedents == n);
  258                 for (i = 0; i < n; i++) {
  259                         /* Connect block node to each read node. */
  260                         RF_ASSERT(diskNodes[i].numAntecedents == 1);
  261                         blockNode->succedents[i] = &diskNodes[i];
  262                         diskNodes[i].antecedents[0] = blockNode;
  263                         diskNodes[i].antType[0] = rf_control;
  264 
  265                         /* Connect each read node to the commit node. */
  266                         RF_ASSERT(diskNodes[i].numSuccedents == 1);
  267                         diskNodes[i].succedents[0] = commitNode;
  268                         commitNode->antecedents[i] = &diskNodes[i];
  269                         commitNode->antType[i] = rf_control;
  270                 }
  271                 /* Connect the commit node to the term node. */
  272                 RF_ASSERT(commitNode->numSuccedents == 1);
  273                 RF_ASSERT(termNode->numAntecedents == 1);
  274                 RF_ASSERT(termNode->numSuccedents == 0);
  275                 commitNode->succedents[0] = termNode;
  276                 termNode->antecedents[0] = commitNode;
  277                 termNode->antType[0] = rf_control;
  278         } else {
  279                 /* Connecting a nonredundant write DAG. */
  280                 /* Connect the block node to the commit node. */
  281                 RF_ASSERT(blockNode->numSuccedents == 1);
  282                 RF_ASSERT(commitNode->numAntecedents == 1);
  283                 blockNode->succedents[0] = commitNode;
  284                 commitNode->antecedents[0] = blockNode;
  285                 commitNode->antType[0] = rf_control;
  286 
  287                 RF_ASSERT(commitNode->numSuccedents == n);
  288                 RF_ASSERT(termNode->numAntecedents == n);
  289                 RF_ASSERT(termNode->numSuccedents == 0);
  290                 for (i = 0; i < n; i++) {
  291                         /* Connect the commit node to each write node. */
  292                         RF_ASSERT(diskNodes[i].numAntecedents == 1);
  293                         commitNode->succedents[i] = &diskNodes[i];
  294                         diskNodes[i].antecedents[0] = commitNode;
  295                         diskNodes[i].antType[0] = rf_control;
  296 
  297                         /* Connect each write node to the term node. */
  298                         RF_ASSERT(diskNodes[i].numSuccedents == 1);
  299                         diskNodes[i].succedents[0] = termNode;
  300                         termNode->antecedents[i] = &diskNodes[i];
  301                         termNode->antType[i] = rf_control;
  302                 }
  303         }
  304 }
  305 /*****************************************************************************
  306  * Create a fault-free read DAG for RAID level 1.
  307  *
  308  * Hdr -> Nil -> Rmir -> Cmt -> Trm
  309  *
  310  * The "Rmir" node schedules a read from the disk in the mirror pair with the
  311  * shortest disk queue.  The proper queue is selected at Rmir execution.  This
  312  * deferred mapping is unlike other archs in RAIDframe which generally fix
  313  * mapping at DAG creation time.
  314  *
  315  * Parameters:  raidPtr   - description of the physical array
  316  *              asmap     - logical & physical addresses for this access
  317  *              bp        - buffer ptr (for holding read data)
  318  *              flags     - general flags (e.g. disk locking)
  319  *              allocList - list of memory allocated in DAG creation
  320  *****************************************************************************/
  321 
  322 void
  323 rf_CreateMirrorReadDAG(
  324         RF_Raid_t                *raidPtr,
  325         RF_AccessStripeMap_t     *asmap,
  326         RF_DagHeader_t           *dag_h,
  327         void                     *bp,
  328         RF_RaidAccessFlags_t      flags,
  329         RF_AllocListElem_t       *allocList,
  330         int                     (*readfunc) (RF_DagNode_t *)
  331 )
  332 {
  333         RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
  334         RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
  335         RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
  336         int i, n, totalNumNodes;
  337 
  338         n = asmap->numStripeUnitsAccessed;
  339         dag_h->creator = "RaidOneReadDAG";
  340         if (rf_dagDebug) {
  341                 printf("[Creating RAID level 1 read DAG]\n");
  342         }
  343         /*
  344          * This dag can not commit until the commit node is reached.
  345          * Errors prior to the commit point imply the dag has failed.
  346          */
  347         dag_h->numCommitNodes = 1;
  348         dag_h->numCommits = 0;
  349         dag_h->numSuccedents = 1;
  350 
  351         /*
  352          * Node count:
  353          * n data reads
  354          * 1 block node
  355          * 1 commit node
  356          * 1 terminator node
  357          */
  358         RF_ASSERT(n > 0);
  359         totalNumNodes = n + 3;
  360         RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
  361             (RF_DagNode_t *), allocList);
  362         i = 0;
  363         readNodes = &nodes[i];
  364         i += n;
  365         blockNode = &nodes[i];
  366         i += 1;
  367         commitNode = &nodes[i];
  368         i += 1;
  369         termNode = &nodes[i];
  370         i += 1;
  371         RF_ASSERT(i == totalNumNodes);
  372 
  373         /* Initialize nodes. */
  374         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  375             rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
  376         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  377             rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
  378         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  379             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  380 
  381         for (i = 0; i < n; i++) {
  382                 RF_ASSERT(data_pda != NULL);
  383                 RF_ASSERT(parity_pda != NULL);
  384                 rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
  385                     rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5,
  386                     0, dag_h, "Rmir", allocList);
  387                 readNodes[i].params[0].p = data_pda;
  388                 readNodes[i].params[1].p = data_pda->bufPtr;
  389                 /* Parity stripe id is not necessary. */
  390                 readNodes[i].params[2].p = 0;
  391                 readNodes[i].params[3].v =
  392                     RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
  393                 readNodes[i].params[4].p = parity_pda;
  394                 data_pda = data_pda->next;
  395                 parity_pda = parity_pda->next;
  396         }
  397 
  398         /*
  399          * Connect nodes.
  400          */
  401 
  402         /* Connect hdr to block node. */
  403         RF_ASSERT(blockNode->numAntecedents == 0);
  404         dag_h->succedents[0] = blockNode;
  405 
  406         /* Connect block node to read nodes. */
  407         RF_ASSERT(blockNode->numSuccedents == n);
  408         for (i = 0; i < n; i++) {
  409                 RF_ASSERT(readNodes[i].numAntecedents == 1);
  410                 blockNode->succedents[i] = &readNodes[i];
  411                 readNodes[i].antecedents[0] = blockNode;
  412                 readNodes[i].antType[0] = rf_control;
  413         }
  414 
  415         /* Connect read nodes to commit node. */
  416         RF_ASSERT(commitNode->numAntecedents == n);
  417         for (i = 0; i < n; i++) {
  418                 RF_ASSERT(readNodes[i].numSuccedents == 1);
  419                 readNodes[i].succedents[0] = commitNode;
  420                 commitNode->antecedents[i] = &readNodes[i];
  421                 commitNode->antType[i] = rf_control;
  422         }
  423 
  424         /* Connect commit node to term node. */
  425         RF_ASSERT(commitNode->numSuccedents == 1);
  426         RF_ASSERT(termNode->numAntecedents == 1);
  427         RF_ASSERT(termNode->numSuccedents == 0);
  428         commitNode->succedents[0] = termNode;
  429         termNode->antecedents[0] = commitNode;
  430         termNode->antType[0] = rf_control;
  431 }
  432 
  433 void
  434 rf_CreateMirrorIdleReadDAG(
  435         RF_Raid_t               *raidPtr,
  436         RF_AccessStripeMap_t    *asmap,
  437         RF_DagHeader_t          *dag_h,
  438         void                    *bp,
  439         RF_RaidAccessFlags_t     flags,
  440         RF_AllocListElem_t      *allocList
  441 )
  442 {
  443         rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
  444             rf_DiskReadMirrorIdleFunc);
  445 }
  446 
  447 void
  448 rf_CreateMirrorPartitionReadDAG(
  449         RF_Raid_t               *raidPtr,
  450         RF_AccessStripeMap_t    *asmap,
  451         RF_DagHeader_t          *dag_h,
  452         void                    *bp,
  453         RF_RaidAccessFlags_t     flags,
  454         RF_AllocListElem_t      *allocList
  455 )
  456 {
  457         rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
  458             rf_DiskReadMirrorPartitionFunc);
  459 }

/* [<][>][^][v][top][bottom][index][help] */