root/dev/raidframe/rf_evenodd_dagfuncs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_RegularPEFunc
  2. rf_RegularONEFunc
  3. rf_SimpleONEFunc
  4. rf_RegularESubroutine
  5. rf_RegularEFunc
  6. rf_DegrESubroutine
  7. rf_Degraded_100_EOFunc
  8. rf_e_EncOneSect
  9. rf_e_encToBuf
  10. rf_RecoveryEFunc
  11. rf_EO_DegradedWriteEFunc
  12. rf_doubleEOdecode
  13. rf_EvenOddDoubleRecoveryFunc
  14. rf_EOWriteDoubleRecoveryFunc

    1 /*      $OpenBSD: rf_evenodd_dagfuncs.c,v 1.7 2002/12/16 07:01:04 tdeval Exp $  */
    2 /*      $NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: ChangMing Wu
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*
   32  * Code for RAID-EVENODD architecture.
   33  */
   34 
   35 #include "rf_types.h"
   36 #include "rf_raid.h"
   37 #include "rf_dag.h"
   38 #include "rf_dagffrd.h"
   39 #include "rf_dagffwr.h"
   40 #include "rf_dagdegrd.h"
   41 #include "rf_dagdegwr.h"
   42 #include "rf_dagutils.h"
   43 #include "rf_dagfuncs.h"
   44 #include "rf_etimer.h"
   45 #include "rf_general.h"
   46 #include "rf_configure.h"
   47 #include "rf_parityscan.h"
   48 #include "rf_evenodd.h"
   49 #include "rf_evenodd_dagfuncs.h"
   50 
   51 /* These redundant functions are for small write. */
   52 RF_RedFuncs_t rf_EOSmallWritePFuncs = {
   53         rf_RegularXorFunc, "Regular Old-New P",
   54         rf_SimpleXorFunc, "Simple Old-New P"
   55 };
   56 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {
   57         rf_RegularONEFunc, "Regular Old-New E",
   58         rf_SimpleONEFunc, "Regular Old-New E"
   59 };
   60 /* These redundant functions are for degraded read. */
   61 RF_RedFuncs_t rf_eoPRecoveryFuncs = {
   62         rf_RecoveryXorFunc, "Recovery Xr",
   63         rf_RecoveryXorFunc, "Recovery Xr"
   64 };
   65 RF_RedFuncs_t rf_eoERecoveryFuncs = {
   66         rf_RecoveryEFunc, "Recovery E Func",
   67         rf_RecoveryEFunc, "Recovery E Func"
   68 };
   69 
   70 
   71 /*****************************************************************************
   72  *   The following encoding node functions is used in
   73  *   EO_000_CreateLargeWriteDAG.
   74  *****************************************************************************/
   75 int
   76 rf_RegularPEFunc(RF_DagNode_t *node)
   77 {
   78         rf_RegularESubroutine(node, node->results[1]);
   79         rf_RegularXorFunc(node);        /* Do the wakeup here ! */
   80 #if 1
   81         return (0);             /* XXX This was missing... GO */
   82 #endif
   83 }
   84 
   85 
   86 /*****************************************************************************
   87  *  For EO_001_CreateSmallWriteDAG, there are (i) RegularONEFunc() and
   88  *  (ii) SimpleONEFunc() to be used. The previous case is when write accesses
   89  *  at least sectors of full stripe unit.
   90  *  The later function is used when the write accesses two stripe units but
   91  *  with total sectors less than sectors per SU. In this case, the access of
   92  *  parity and 'E' are shown as disconnected areas in their stripe unit and
   93  *  parity write and 'E' write are both divided into two distinct writes
   94  *  (totally four). This simple old-new write and regular old-new write happen
   95  *  as in RAID-5.
   96  *****************************************************************************/
   97 
   98 /*
   99  * Algorithm:
  100  *   1. Store the difference of old data and new data in the Rod buffer.
  101  *   2. Then encode this buffer into the buffer that already have old 'E'
  102  *      information inside it, the result can be shown to be the new 'E'
  103  *      information.
  104  *   3. Xor the Wnd buffer into the difference buffer to recover the original
  105  *      old data.
  106  * Here we have another alternative: to allocate a temporary buffer for
  107  * storing the difference of old data and new data, then encode temp buf
  108  * into old 'E' buf to form new 'E', but this approach takes the same speed
  109  * as the previous, and needs more memory.
  110  */
  111 int
  112 rf_RegularONEFunc(RF_DagNode_t *node)
  113 {
  114         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  115         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  116         int EpdaIndex = (node->numParams - 1) / 2 - 1;  /*
  117                                                          * The parameter of node
  118                                                          * where you can find
  119                                                          * e-pda.
  120                                                          */
  121         int i, k, retcode = 0;
  122         int suoffset, length;
  123         RF_RowCol_t scol;
  124         char *srcbuf, *destbuf;
  125         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  126         RF_Etimer_t timer;
  127         RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *)
  128             node->params[EpdaIndex].p;
  129         /* Generally zero. */
  130         int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
  131 
  132         RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
  133         RF_ASSERT(ESUOffset == 0);
  134 
  135         RF_ETIMER_START(timer);
  136 
  137         /*
  138          * Xor the Wnd buffer into Rod buffer. The difference of old data and
  139          * new data is stored in Rod buffer.
  140          */
  141         for (k = 0; k < EpdaIndex; k += 2) {
  142                 length = rf_RaidAddressToByte(raidPtr,
  143                     ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
  144                 retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
  145                     node->params[k + 1].p, length, node->dagHdr->bp);
  146         }
  147         /*
  148          * Start to encode the buffer, storing the difference of old data and
  149          * new data into 'E' buffer.
  150          */
  151         for (i = 0; i < EpdaIndex; i += 2)
  152                 if (node->params[i + 1].p != node->results[0]) {
  153                         /* results[0] is buf ptr of E. */
  154                         pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  155                         srcbuf = (char *) node->params[i + 1].p;
  156                         scol = rf_EUCol(layoutPtr, pda->raidAddress);
  157                         suoffset = rf_StripeUnitOffset(layoutPtr,
  158                             pda->startSector);
  159                         destbuf = ((char *) node->results[0]) +
  160                             rf_RaidAddressToByte(raidPtr, suoffset);
  161                         rf_e_encToBuf(raidPtr, scol, srcbuf,
  162                             RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
  163                 }
  164         /*
  165          * Recover the original old data to be used by parity encoding
  166          * function in XorNode.
  167          */
  168         for (k = 0; k < EpdaIndex; k += 2) {
  169                 length = rf_RaidAddressToByte(raidPtr,
  170                     ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
  171                 retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
  172                     node->params[k + 1].p, length, node->dagHdr->bp);
  173         }
  174         RF_ETIMER_STOP(timer);
  175         RF_ETIMER_EVAL(timer);
  176         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  177         rf_GenericWakeupFunc(node, 0);
  178 #if 1
  179         return (0);             /* XXX This was missing... GO */
  180 #endif
  181 }
  182 
  183 int
  184 rf_SimpleONEFunc(RF_DagNode_t *node)
  185 {
  186         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  187         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  188         RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
  189         int retcode = 0;
  190         char *srcbuf, *destbuf;
  191         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  192         int length;
  193         RF_RowCol_t scol;
  194         RF_Etimer_t timer;
  195 
  196         RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type ==
  197             RF_PDA_TYPE_Q);
  198         if (node->dagHdr->status == rf_enable) {
  199                 RF_ETIMER_START(timer);
  200                 /* This is a pda of writeDataNodes. */
  201                 length = rf_RaidAddressToByte(raidPtr,
  202                     ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);
  203                 /* bxor to buffer of readDataNodes. */
  204                 retcode = rf_bxor(node->params[5].p, node->params[1].p,
  205                     length, node->dagHdr->bp);
  206                 /*
  207                  * Find out the corresponding column in encoding matrix for
  208                  * write column to be encoded into redundant disk 'E'.
  209                  */
  210                 scol = rf_EUCol(layoutPtr, pda->raidAddress);
  211                 srcbuf = node->params[1].p;
  212                 destbuf = node->params[3].p;
  213                 /* Start encoding process. */
  214                 rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
  215                     destbuf, pda->numSector);
  216                 rf_bxor(node->params[5].p, node->params[1].p, length,
  217                     node->dagHdr->bp);
  218                 RF_ETIMER_STOP(timer);
  219                 RF_ETIMER_EVAL(timer);
  220                 tracerec->q_us += RF_ETIMER_VAL_US(timer);
  221 
  222         }
  223         return (rf_GenericWakeupFunc(node, retcode));   /*
  224                                                          * Call wake func
  225                                                          * explicitly since no
  226                                                          * I/O in this node.
  227                                                          */
  228 }
  229 
  230 
  231 /*
  232  * Called by rf_RegularPEFunc(node) and rf_RegularEFunc(node)
  233  * in f.f. large write.
  234  */
  235 void
  236 rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
  237 {
  238         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  239         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  240         RF_PhysDiskAddr_t *pda;
  241         int i, suoffset;
  242         RF_RowCol_t scol;
  243         char *srcbuf, *destbuf;
  244         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  245         RF_Etimer_t timer;
  246 
  247         RF_ETIMER_START(timer);
  248         for (i = 0; i < node->numParams - 2; i += 2) {
  249                 RF_ASSERT(node->params[i + 1].p != ebuf);
  250                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  251                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  252                 scol = rf_EUCol(layoutPtr, pda->raidAddress);
  253                 srcbuf = (char *) node->params[i + 1].p;
  254                 destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
  255                 rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
  256                     destbuf, pda->numSector);
  257         }
  258         RF_ETIMER_STOP(timer);
  259         RF_ETIMER_EVAL(timer);
  260         tracerec->xor_us += RF_ETIMER_VAL_US(timer);
  261 }
  262 
  263 
  264 /*****************************************************************************
  265  *                       Used in  EO_001_CreateLargeWriteDAG.
  266  *****************************************************************************/
  267 int
  268 rf_RegularEFunc(RF_DagNode_t *node)
  269 {
  270         rf_RegularESubroutine(node, node->results[0]);
  271         rf_GenericWakeupFunc(node, 0);
  272 #if 1
  273         return (0);             /* XXX This was missing... GO */
  274 #endif
  275 }
  276 
  277 
  278 /*****************************************************************************
  279  * This degraded function allow only two cases:
  280  *   1. When write accesses the full failed stripe unit, then the access can
  281  *      be more than one stripe unit.
  282  *   2. When write accesses only part of the failed SU, we assume accesses of
  283  *      more than one stripe unit are not allowed so that the write can be
  284  *      dealt with like a large write.
  285  * The following function is based on these assumptions. So except in the
  286  * second case, it looks the same as a large write encoding function. But
  287  * this is not exactly the normal way of doing a degraded write, since
  288  * RAIDframe has to break cases of accesses other than the above two into
  289  * smaller accesses. We may have to change DegrESubroutin in the future.
  290  *****************************************************************************/
  291 void
  292 rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
  293 {
  294         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  295         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  296         RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
  297         RF_PhysDiskAddr_t *pda;
  298         int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
  299         RF_RowCol_t scol;
  300         char *srcbuf, *destbuf;
  301         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  302         RF_Etimer_t timer;
  303 
  304         RF_ETIMER_START(timer);
  305         for (i = 0; i < node->numParams - 2; i += 2) {
  306                 RF_ASSERT(node->params[i + 1].p != ebuf);
  307                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  308                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  309                 scol = rf_EUCol(layoutPtr, pda->raidAddress);
  310                 srcbuf = (char *) node->params[i + 1].p;
  311                 destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
  312                 rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
  313         }
  314 
  315         RF_ETIMER_STOP(timer);
  316         RF_ETIMER_EVAL(timer);
  317         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  318 }
  319 
  320 
  321 /*****************************************************************************
  322  * This function is used in case where one data disk failed and both redundant
  323  * disks are alive. It is used in the EO_100_CreateWriteDAG. Note: if there is
  324  * another disk failed in the stripe but not accessed at this time, then we
  325  * should, instead, use the rf_EOWriteDoubleRecoveryFunc().
  326  *****************************************************************************/
  327 int
  328 rf_Degraded_100_EOFunc(RF_DagNode_t *node)
  329 {
  330         rf_DegrESubroutine(node, node->results[1]);
  331         rf_RecoveryXorFunc(node);       /* Does the wakeup here ! */
  332 #if 1
  333         return (0);             /* XXX This was missing... Should these be
  334                                  * void functions ??? GO */
  335 #endif
  336 }
  337 
  338 
  339 /*****************************************************************************
  340  * This function is to encode one sector in one of the data disks to the E
  341  * disk. However, in evenodd this function can also be used as decoding
  342  * function to recover data from dead disk in the case of parity failure and
  343  * a single data failure.
  344  *****************************************************************************/
  345 void
  346 rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
  347     RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector)
  348 {
  349         int S_index;            /*
  350                                  * Index of the EU in the src col which need
  351                                  * be Xored into all EUs in a dest sector.
  352                                  */
  353         int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
  354         RF_RowCol_t j, indexInDest;     /*
  355                                          * Row index of an encoding unit in
  356                                          * the destination column of encoding
  357                                          * matrix.
  358                                          */
  359         RF_RowCol_t indexInSrc; /*
  360                                  * Row index of an encoding unit in the source
  361                                  * column used for recovery.
  362                                  */
  363         int bytesPerEU = bytesPerSector / numRowInEncMatrix;
  364 
  365 #if     RF_EO_MATRIX_DIM > 17
  366         int shortsPerEU = bytesPerEU / sizeof(short);
  367         short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
  368         short temp1;
  369 #elif   RF_EO_MATRIX_DIM == 17
  370         int longsPerEU = bytesPerEU / sizeof(long);
  371         long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
  372         long temp1;
  373 #endif
  374 
  375 #if     RF_EO_MATRIX_DIM > 17
  376         RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
  377         RF_ASSERT(bytesPerEU % sizeof(short) == 0);
  378 #elif   RF_EO_MATRIX_DIM == 17
  379         RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
  380         RF_ASSERT(bytesPerEU % sizeof(long) == 0);
  381 #endif
  382 
  383         S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
  384 #if     RF_EO_MATRIX_DIM > 17
  385         srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
  386 #elif   RF_EO_MATRIX_DIM == 17
  387         srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
  388 #endif
  389 
  390         for (indexInDest = 0; indexInDest < numRowInEncMatrix; indexInDest++) {
  391                 indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
  392 
  393 #if     RF_EO_MATRIX_DIM > 17
  394                 destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
  395                 srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
  396                 for (j = 0; j < shortsPerEU; j++) {
  397                         temp1 = destShortBuf[j] ^ srcShortBuf1[j];
  398                         /* Note: S_index won't be at the end row for any src
  399                          * col ! */
  400                         if (indexInSrc != RF_EO_MATRIX_DIM - 1)
  401                                 destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
  402                         /* if indexInSrc is at the end row, ie.
  403                          * RF_EO_MATRIX_DIM -1, then all elements are zero ! */
  404                         else
  405                                 destShortBuf[j] = temp1;
  406                 }
  407 
  408 #elif   RF_EO_MATRIX_DIM == 17
  409                 destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
  410                 srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
  411                 for (j = 0; j < longsPerEU; j++) {
  412                         temp1 = destLongBuf[j] ^ srcLongBuf1[j];
  413                         if (indexInSrc != RF_EO_MATRIX_DIM - 1)
  414                                 destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
  415                         else
  416                                 destLongBuf[j] = temp1;
  417                 }
  418 #endif
  419         }
  420 }
  421 
  422 void
  423 rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol, char *srcbuf,
  424     RF_RowCol_t destLogicCol, char *destbuf, int numSector)
  425 {
  426         int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  427 
  428         for (i = 0; i < numSector; i++) {
  429                 rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
  430                 srcbuf += bytesPerSector;
  431                 destbuf += bytesPerSector;
  432         }
  433 }
  434 
  435 
  436 /*****************************************************************************
  437  * when parity die and one data die, We use second redundant information, 'E',
  438  * to recover the data in dead disk. This function is used in the recovery node of
  439  * for EO_110_CreateReadDAG
  440  *****************************************************************************/
  441 int
  442 rf_RecoveryEFunc(RF_DagNode_t *node)
  443 {
  444         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  445         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  446         RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
  447         RF_RowCol_t scol;       /* source logical column */
  448         RF_RowCol_t fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of
  449                                                                          * failed SU */
  450         int i;
  451         RF_PhysDiskAddr_t *pda;
  452         int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
  453         char *srcbuf, *destbuf;
  454         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  455         RF_Etimer_t timer;
  456 
  457         bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
  458         if (node->dagHdr->status == rf_enable) {
  459                 RF_ETIMER_START(timer);
  460                 for (i = 0; i < node->numParams - 2; i += 2)
  461                         if (node->params[i + 1].p != node->results[0]) {
  462                                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  463                                 if (i == node->numParams - 4)
  464                                         scol = RF_EO_MATRIX_DIM - 2;    /* the colume of
  465                                                                          * redundant E */
  466                                 else
  467                                         scol = rf_EUCol(layoutPtr, pda->raidAddress);
  468                                 srcbuf = (char *) node->params[i + 1].p;
  469                                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  470                                 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
  471                                 rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
  472                         }
  473                 RF_ETIMER_STOP(timer);
  474                 RF_ETIMER_EVAL(timer);
  475                 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
  476         }
  477         return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
  478 }
  479 
  480 
  481 /*****************************************************************************
  482  * This function is used in the case where one data and the parity have filed.
  483  * (in EO_110_CreateWriteDAG)
  484  *****************************************************************************/
  485 int
  486 rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
  487 {
  488         rf_DegrESubroutine(node, node->results[0]);
  489         rf_GenericWakeupFunc(node, 0);
  490 #if 1
  491         return (0);             /* XXX Yet another one !!! GO */
  492 #endif
  493 }
  494 
  495 
  496 
  497 /*****************************************************************************
  498  *      THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES.
  499  *****************************************************************************/
  500 
  501 void
  502 rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest,
  503     RF_RowCol_t *fcol, char *pbuf, char *ebuf)
  504 {
  505         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
  506         int i, j, k, f1, f2, row;
  507         int rrdrow, erow, count = 0;
  508         int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  509         int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
  510 #if 0
  511         int pcol = (RF_EO_MATRIX_DIM) - 1;
  512 #endif
  513         int ecol = (RF_EO_MATRIX_DIM) - 2;
  514         int bytesPerEU = bytesPerSector / numRowInEncMatrix;
  515         int numDataCol = layoutPtr->numDataCol;
  516 #if     RF_EO_MATRIX_DIM > 17
  517         int shortsPerEU = bytesPerEU / sizeof(short);
  518         short *rrdbuf_current, *pbuf_current, *ebuf_current;
  519         short *dest_smaller, *dest_smaller_current;
  520         short *dest_larger, *dest_larger_current;
  521         short *temp;
  522         short *P;
  523 
  524         RF_ASSERT(bytesPerEU % sizeof(short) == 0);
  525         RF_Malloc(P, bytesPerEU, (short *));
  526         RF_Malloc(temp, bytesPerEU, (short *));
  527 #elif   RF_EO_MATRIX_DIM == 17
  528         int longsPerEU = bytesPerEU / sizeof(long);
  529         long *rrdbuf_current, *pbuf_current, *ebuf_current;
  530         long *dest_smaller, *dest_smaller_current;
  531         long *dest_larger, *dest_larger_current;
  532         long *temp;
  533         long *P;
  534 
  535         RF_ASSERT(bytesPerEU % sizeof(long) == 0);
  536         RF_Malloc(P, bytesPerEU, (long *));
  537         RF_Malloc(temp, bytesPerEU, (long *));
  538 #endif
  539         RF_ASSERT(*((long *) dest[0]) == 0);
  540         RF_ASSERT(*((long *) dest[1]) == 0);
  541         bzero((char *) P, bytesPerEU);
  542         bzero((char *) temp, bytesPerEU);
  543         RF_ASSERT(*P == 0);
  544         /*
  545          * Calculate the 'P' parameter, which, not parity, is the Xor of all
  546          * elements in the last two column, ie. 'E' and 'parity' columns, see
  547          * the Ref. paper by Blaum, et al 1993.
  548          */
  549         for (i = 0; i < numRowInEncMatrix; i++)
  550                 for (k = 0; k < longsPerEU; k++) {
  551 #if     RF_EO_MATRIX_DIM > 17
  552                         ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
  553                         pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
  554 #elif   RF_EO_MATRIX_DIM == 17
  555                         ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
  556                         pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
  557 #endif
  558                         P[k] ^= *ebuf_current;
  559                         P[k] ^= *pbuf_current;
  560                 }
  561         RF_ASSERT(fcol[0] != fcol[1]);
  562         if (fcol[0] < fcol[1]) {
  563 #if     RF_EO_MATRIX_DIM > 17
  564                 dest_smaller = (short *) (dest[0]);
  565                 dest_larger = (short *) (dest[1]);
  566 #elif   RF_EO_MATRIX_DIM == 17
  567                 dest_smaller = (long *) (dest[0]);
  568                 dest_larger = (long *) (dest[1]);
  569 #endif
  570                 f1 = fcol[0];
  571                 f2 = fcol[1];
  572         } else {
  573 #if     RF_EO_MATRIX_DIM > 17
  574                 dest_smaller = (short *) (dest[1]);
  575                 dest_larger = (short *) (dest[0]);
  576 #elif   RF_EO_MATRIX_DIM == 17
  577                 dest_smaller = (long *) (dest[1]);
  578                 dest_larger = (long *) (dest[0]);
  579 #endif
  580                 f1 = fcol[1];
  581                 f2 = fcol[0];
  582         }
  583         row = (RF_EO_MATRIX_DIM) - 1;
  584         while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) !=
  585             ((RF_EO_MATRIX_DIM) - 1)) {
  586 #if     RF_EO_MATRIX_DIM > 17
  587                 dest_larger_current = dest_larger + row * shortsPerEU;
  588                 dest_smaller_current = dest_smaller + row * shortsPerEU;
  589 #elif   RF_EO_MATRIX_DIM == 17
  590                 dest_larger_current = dest_larger + row * longsPerEU;
  591                 dest_smaller_current = dest_smaller + row * longsPerEU;
  592 #endif
  593                 /*
  594                  * Do the diagonal recovery. Initially, temp[k] = (failed 1),
  595                  * which is the failed data in the column that has smaller
  596                  * col index.
  597                  */
  598                 /* Step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
  599                 for (j = 0; j < numDataCol; j++) {
  600                         if (j == f1 || j == f2)
  601                                 continue;
  602                         rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
  603                         if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
  604 #if     RF_EO_MATRIX_DIM > 17
  605                                 rrdbuf_current = (short *) (rrdbuf[j]) +
  606                                     rrdrow * shortsPerEU;
  607                                 for (k = 0; k < shortsPerEU; k++)
  608                                         temp[k] ^= *(rrdbuf_current + k);
  609 #elif   RF_EO_MATRIX_DIM == 17
  610                                 rrdbuf_current = (long *) (rrdbuf[j]) +
  611                                     rrdrow * longsPerEU;
  612                                 for (k = 0; k < longsPerEU; k++)
  613                                         temp[k] ^= *(rrdbuf_current + k);
  614 #endif
  615                         }
  616                 }
  617                 /*
  618                  * Step 2:  ^E(erow,m-2), If erow is at the bottom row, don't
  619                  * Xor into it.  E(erow,m-2) = (principle diagonal) ^ (failed
  620                  * 1) ^ (failed 2) ^ (SUM of nonfailed in-diagonal
  621                  * A(rrdrow,0..m-3))
  622                  * After this step, temp[k] = (principle diagonal) ^ (failed 2).
  623                  */
  624 
  625                 erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
  626                 if (erow != (RF_EO_MATRIX_DIM) - 1) {
  627 #if     RF_EO_MATRIX_DIM > 17
  628                         ebuf_current = (short *) ebuf + shortsPerEU * erow;
  629                         for (k = 0; k < shortsPerEU; k++)
  630                                 temp[k] ^= *(ebuf_current + k);
  631 #elif   RF_EO_MATRIX_DIM == 17
  632                         ebuf_current = (long *) ebuf + longsPerEU * erow;
  633                         for (k = 0; k < longsPerEU; k++)
  634                                 temp[k] ^= *(ebuf_current + k);
  635 #endif
  636                 }
  637                 /*
  638                  * Step 3: ^P to obtain the failed data (failed 2). P can be
  639                  * proved to be actually (principal diagonal). After this
  640                  * step, temp[k] = (failed 2), the failed data to be recovered.
  641                  */
  642 #if     RF_EO_MATRIX_DIM > 17
  643                 for (k = 0; k < shortsPerEU; k++)
  644                         temp[k] ^= P[k];
  645                 /* Put the data into the destination buffer. */
  646                 for (k = 0; k < shortsPerEU; k++)
  647                         dest_larger_current[k] = temp[k];
  648 #elif   RF_EO_MATRIX_DIM == 17
  649                 for (k = 0; k < longsPerEU; k++)
  650                         temp[k] ^= P[k];
  651                 /* Put the data into the destination buffer. */
  652                 for (k = 0; k < longsPerEU; k++)
  653                         dest_larger_current[k] = temp[k];
  654 #endif
  655 
  656                 /* THE FOLLOWING DO THE HORIZONTAL XOR. */
  657                 /*
  658                  * Step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
  659                  * columns.
  660                  */
  661                 for (j = 0; j < numDataCol; j++) {
  662                         if (j == f1 || j == f2)
  663                                 continue;
  664 #if     RF_EO_MATRIX_DIM > 17
  665                         rrdbuf_current = (short *) (rrdbuf[j]) +
  666                             row * shortsPerEU;
  667                         for (k = 0; k < shortsPerEU; k++)
  668                                 temp[k] ^= *(rrdbuf_current + k);
  669 #elif   RF_EO_MATRIX_DIM == 17
  670                         rrdbuf_current = (long *) (rrdbuf[j]) +
  671                             row * longsPerEU;
  672                         for (k = 0; k < longsPerEU; k++)
  673                                 temp[k] ^= *(rrdbuf_current + k);
  674 #endif
  675                 }
  676                 /* Step 2: ^A(row,m-1) */
  677                 /* Step 3: Put the data into the destination buffer. */
  678 #if     RF_EO_MATRIX_DIM > 17
  679                 pbuf_current = (short *) pbuf + shortsPerEU * row;
  680                 for (k = 0; k < shortsPerEU; k++)
  681                         temp[k] ^= *(pbuf_current + k);
  682                 for (k = 0; k < shortsPerEU; k++)
  683                         dest_smaller_current[k] = temp[k];
  684 #elif   RF_EO_MATRIX_DIM == 17
  685                 pbuf_current = (long *) pbuf + longsPerEU * row;
  686                 for (k = 0; k < longsPerEU; k++)
  687                         temp[k] ^= *(pbuf_current + k);
  688                 for (k = 0; k < longsPerEU; k++)
  689                         dest_smaller_current[k] = temp[k];
  690 #endif
  691                 count++;
  692         }
  693         /*
  694          * Check if all Encoding Unit in the data buffer have been decoded ?
  695          * According to EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime
  696          * number, this algorithm will covered all buffer.
  697          */
  698         RF_ASSERT(count == numRowInEncMatrix);
  699         RF_Free((char *) P, bytesPerEU);
  700         RF_Free((char *) temp, bytesPerEU);
  701 }
  702 
  703 
  704 /*****************************************************************************
  705  *      This function is called by double degraded read EO_200_CreateReadDAG.
  706  *****************************************************************************/
  707 int
  708 rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
  709 {
  710         int ndataParam = 0;
  711         int np = node->numParams;
  712         RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
  713             node->params[np - 1].p;
  714         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
  715         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
  716         int i, prm, sector, nresults = node->numResults;
  717         RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
  718         unsigned sosAddr;
  719         int two = 0, mallc_one = 0, mallc_two = 0;      /*
  720                                                          * Flags to indicate if
  721                                                          * memory is allocated.
  722                                                          */
  723         int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  724         RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
  725             npda;
  726         RF_RowCol_t fcol[2], fsuoff[2], fsuend[2],
  727             numDataCol = layoutPtr->numDataCol;
  728         char **buf, *ebuf, *pbuf, *dest[2];
  729         long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
  730         RF_SectorNum_t startSector, endSector;
  731         RF_Etimer_t timer;
  732         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  733 
  734         RF_ETIMER_START(timer);
  735 
  736         /*
  737          * Find out the number of parameters that are pdas for data
  738          * information.
  739          */
  740         for (i = 0; i <= np; i++)
  741                 if (((RF_PhysDiskAddr_t *) node->params[i].p)->type !=
  742                     RF_PDA_TYPE_DATA) {
  743                         ndataParam = i;
  744                         break;
  745                 }
  746         RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
  747         if (ndataParam != 0) {
  748                 RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
  749                 RF_Malloc(suend, ndataParam * sizeof(long), (long *));
  750                 RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
  751         }
  752         if (asmap->failedPDAs[1] &&
  753             (asmap->failedPDAs[1]->numSector +
  754              asmap->failedPDAs[0]->numSector) < secPerSU) {
  755                 RF_ASSERT(0);   /* Currently, no support for this situation. */
  756                 ppda = node->params[np - 6].p;
  757                 ppda2 = node->params[np - 5].p;
  758                 RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
  759                 epda = node->params[np - 4].p;
  760                 epda2 = node->params[np - 3].p;
  761                 RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
  762                 two = 1;
  763         } else {
  764                 ppda = node->params[np - 4].p;
  765                 epda = node->params[np - 3].p;
  766                 psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
  767                 esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
  768                 RF_ASSERT(psuoff == esuoff);
  769         }
  770         /*
  771          * The followings have three goals:
  772          *   1. Determine the startSector to begin decoding and endSector
  773          *      to end decoding.
  774          *   2. Determine the column numbers of the two failed disks.
  775          *   3. Determine the offset and end offset of the access within
  776          *      each failed stripe unit.
  777          */
  778         if (nresults == 1) {
  779                 /* Find the startSector to begin decoding. */
  780                 pda = node->results[0];
  781                 bzero(pda->bufPtr, bytesPerSector * pda->numSector);
  782                 fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  783                 fsuend[0] = fsuoff[0] + pda->numSector;
  784                 startSector = fsuoff[0];
  785                 endSector = fsuend[0];
  786 
  787                 /* Find out the column of failed disk being accessed. */
  788                 fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
  789 
  790                 /* Find out the other failed column not accessed. */
  791                 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
  792                     asmap->raidAddress);
  793                 for (i = 0; i < numDataCol; i++) {
  794                         npda.raidAddress = sosAddr + (i * secPerSU);
  795                         (raidPtr->Layout.map->MapSector) (raidPtr,
  796                             npda.raidAddress, &(npda.row), &(npda.col),
  797                             &(npda.startSector), 0);
  798                         /* Skip over dead disks. */
  799                         if (RF_DEAD_DISK(raidPtr
  800                             ->Disks[npda.row][npda.col].status))
  801                                 if (i != fcol[0])
  802                                         break;
  803                 }
  804                 RF_ASSERT(i < numDataCol);
  805                 fcol[1] = i;
  806         } else {
  807                 RF_ASSERT(nresults == 2);
  808                 pda0 = node->results[0];
  809                 bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
  810                 pda1 = node->results[1];
  811                 bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
  812                 /*
  813                  * Determine the failed column numbers of the two failed
  814                  * disks.
  815                  */
  816                 fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
  817                 fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
  818                 /*
  819                  * Determine the offset and end offset of the access within
  820                  * each failed stripe unit.
  821                  */
  822                 fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
  823                 fsuend[0] = fsuoff[0] + pda0->numSector;
  824                 fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
  825                 fsuend[1] = fsuoff[1] + pda1->numSector;
  826                 /* Determine the startSector to begin decoding. */
  827                 startSector = RF_MIN(pda0->startSector, pda1->startSector);
  828                 /* Determine the endSector to end decoding. */
  829                 endSector = RF_MAX(fsuend[0], fsuend[1]);
  830         }
  831         /*
  832          * Assign the beginning sector and the end sector for each parameter.
  833          * Find out the corresponding column # for each parameter.
  834          */
  835         for (prm = 0; prm < ndataParam; prm++) {
  836                 pda = node->params[prm].p;
  837                 suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  838                 suend[prm] = suoff[prm] + pda->numSector;
  839                 prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
  840         }
  841         /*
  842          * 'sector' is the sector for the current decoding algorithm. For each
  843          * sector in the failed SU
  844          * 1. Find out the corresponding parameters that cover the current
  845          *    sector and that are needed for the decoding of this sector in
  846          *    failed SU.
  847          * 2. Find out if sector is in the shadow of any accessed failed SU.
  848          *    If not, malloc a temporary space of a sector in size.
  849          */
  850         for (sector = startSector; sector < endSector; sector++) {
  851                 if (nresults == 2)
  852                         if (!(fsuoff[0] <= sector && sector < fsuend[0]) &&
  853                             !(fsuoff[1] <= sector && sector < fsuend[1]))
  854                                 continue;
  855                 for (prm = 0; prm < ndataParam; prm++)
  856                         if (suoff[prm] <= sector && sector < suend[prm])
  857                                 buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)
  858                                     node->params[prm].p)->bufPtr +
  859                                     rf_RaidAddressToByte(raidPtr,
  860                                      sector - suoff[prm]);
  861                 /*
  862                  * Find out if sector is in the shadow of any accessed failed
  863                  * SU. If yes, assign dest[0], dest[1] to point at suitable
  864                  * position of the buffer corresponding to failed SUs. If no,
  865                  * malloc a temporary space of a sector in size for
  866                  * destination of decoding.
  867                  */
  868                 RF_ASSERT(nresults == 1 || nresults == 2);
  869                 if (nresults == 1) {
  870                         dest[0] = ((RF_PhysDiskAddr_t *)
  871                             node->results[0])->bufPtr +
  872                             rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
  873                         /* Always malloc temp buffer to dest[1]. */
  874                         RF_Malloc(dest[1], bytesPerSector, (char *));
  875                         bzero(dest[1], bytesPerSector);
  876                         mallc_two = 1;
  877                 } else {
  878                         if (fsuoff[0] <= sector && sector < fsuend[0])
  879                                 dest[0] = ((RF_PhysDiskAddr_t *)
  880                                     node->results[0])->bufPtr +
  881                                     rf_RaidAddressToByte(raidPtr,
  882                                      sector - fsuoff[0]);
  883                         else {
  884                                 RF_Malloc(dest[0], bytesPerSector, (char *));
  885                                 bzero(dest[0], bytesPerSector);
  886                                 mallc_one = 1;
  887                         }
  888                         if (fsuoff[1] <= sector && sector < fsuend[1])
  889                                 dest[1] = ((RF_PhysDiskAddr_t *)
  890                                     node->results[1])->bufPtr +
  891                                     rf_RaidAddressToByte(raidPtr,
  892                                      sector - fsuoff[1]);
  893                         else {
  894                                 RF_Malloc(dest[1], bytesPerSector, (char *));
  895                                 bzero(dest[1], bytesPerSector);
  896                                 mallc_two = 1;
  897                         }
  898                         RF_ASSERT(mallc_one == 0 || mallc_two == 0);
  899                 }
  900                 pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr,
  901                     sector - psuoff);
  902                 ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr,
  903                     sector - esuoff);
  904                 /*
  905                  * After finish finding all needed sectors, call doubleEOdecode
  906                  * function for decoding one sector to destination.
  907                  */
  908                 rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
  909                 /*
  910                  * Free all allocated memory, and mark flag to indicate no
  911                  * memory is being allocated.
  912                  */
  913                 if (mallc_one == 1)
  914                         RF_Free(dest[0], bytesPerSector);
  915                 if (mallc_two == 1)
  916                         RF_Free(dest[1], bytesPerSector);
  917                 mallc_one = mallc_two = 0;
  918         }
  919         RF_Free(buf, numDataCol * sizeof(char *));
  920         if (ndataParam != 0) {
  921                 RF_Free(suoff, ndataParam * sizeof(long));
  922                 RF_Free(suend, ndataParam * sizeof(long));
  923                 RF_Free(prmToCol, ndataParam * sizeof(long));
  924         }
  925         RF_ETIMER_STOP(timer);
  926         RF_ETIMER_EVAL(timer);
  927         if (tracerec) {
  928                 tracerec->q_us += RF_ETIMER_VAL_US(timer);
  929         }
  930         rf_GenericWakeupFunc(node, 0);
  931 #if 1
  932         return (0);             /* XXX Is this even close !!?!?!!? GO */
  933 #endif
  934 }
  935 
  936 
  937 /*
  938  * Currently, only access of one of the two failed SU is allowed in this
  939  * function. Also, asmap->numStripeUnitsAccessed is limited to be one,
  940  * the RAIDframe will break large access into many accesses of single
  941  * stripe unit.
  942  */
  943 
  944 int
  945 rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
  946 {
  947         int np = node->numParams;
  948         RF_AccessStripeMap_t *asmap =
  949             (RF_AccessStripeMap_t *) node->params[np - 1].p;
  950         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
  951         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
  952         RF_SectorNum_t sector;
  953         RF_RowCol_t col, scol;
  954         int prm, i, j;
  955         RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
  956         unsigned sosAddr;
  957         unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  958         RF_int64 numbytes;
  959         RF_SectorNum_t startSector, endSector;
  960         RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
  961         RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
  962         char **buf;             /*
  963                                  * buf[0], buf[1], buf[2], ... etc, point to
  964                                  * buffer storing data read from col0, col1,
  965                                  * col2.
  966                                  */
  967         char *ebuf, *pbuf, *dest[2], *olddata[2];
  968         RF_Etimer_t timer;
  969         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  970 
  971         RF_ASSERT(asmap->numDataFailed == 1);   /*
  972                                                  * Currently only support this
  973                                                  * case, the other failed SU
  974                                                  * is not being accessed.
  975                                                  */
  976         RF_ETIMER_START(timer);
  977         RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
  978 
  979         ppda = node->results[0];        /*
  980                                          * Instead of being buffers,
  981                                          * node->results[0] and [1]
  982                                          * are Ppda and Epda.
  983                                          */
  984         epda = node->results[1];
  985         fpda = asmap->failedPDAs[0];
  986 
  987         /* First, recovery the failed old SU using EvenOdd double decoding. */
  988         /* Determine the startSector and endSector for decoding. */
  989         startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
  990         endSector = startSector + fpda->numSector;
  991         /*
  992          * Assign buf[col] pointers to point to each non-failed column and
  993          * initialize the pbuf and ebuf to point at the beginning of each
  994          * source buffers and destination buffers. */
  995         for (prm = 0; prm < numDataCol - 2; prm++) {
  996                 pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
  997                 col = rf_EUCol(layoutPtr, pda->raidAddress);
  998                 buf[col] = pda->bufPtr;
  999         }
 1000         /*
 1001          * pbuf and ebuf: They will change values as double recovery decoding
 1002          * goes on.
 1003          */
 1004         pbuf = ppda->bufPtr;
 1005         ebuf = epda->bufPtr;
 1006         /*
 1007          * Find out the logical column numbers in the encoding matrix of the
 1008          * two failed columns.
 1009          */
 1010         fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
 1011 
 1012         /* Find out the other failed column not accessed this time. */
 1013         sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
 1014             asmap->raidAddress);
 1015         for (i = 0; i < numDataCol; i++) {
 1016                 npda.raidAddress = sosAddr + (i * secPerSU);
 1017                 (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
 1018                     &(npda.row), &(npda.col), &(npda.startSector), 0);
 1019                 /* Skip over dead disks. */
 1020                 if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
 1021                         if (i != fcol[0])
 1022                                 break;
 1023         }
 1024         RF_ASSERT(i < numDataCol);
 1025         fcol[1] = i;
 1026         /* Assign temporary space to put recovered failed SU. */
 1027         numbytes = fpda->numSector * bytesPerSector;
 1028         RF_Malloc(olddata[0], numbytes, (char *));
 1029         RF_Malloc(olddata[1], numbytes, (char *));
 1030         dest[0] = olddata[0];
 1031         dest[1] = olddata[1];
 1032         bzero(olddata[0], numbytes);
 1033         bzero(olddata[1], numbytes);
 1034         /*
 1035          * Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j]
 1036          * have already pointed at the beginning of each source buffers and
 1037          * destination buffers.
 1038          */
 1039         for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
 1040                 rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
 1041                 for (j = 0; j < numDataCol; j++)
 1042                         if ((j != fcol[0]) && (j != fcol[1]))
 1043                                 buf[j] += bytesPerSector;
 1044                 dest[0] += bytesPerSector;
 1045                 dest[1] += bytesPerSector;
 1046                 ebuf += bytesPerSector;
 1047                 pbuf += bytesPerSector;
 1048         }
 1049         /*
 1050          * After recovery, the buffer pointed by olddata[0] is the old failed
 1051          * data. With new writing data and this old data, use small write to
 1052          * calculate the new redundant informations.
 1053          */
 1054         /*
 1055          * node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
 1056          * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
 1057          * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
 1058          * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
 1059          * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
 1060          * wudNodes; For current implementation, we assume the simplest case:
 1061          * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
 1062          * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
 1063          * data to be written to the failed disk. We first bxor the new data
 1064          * into the old recovered data, then do the same things as small
 1065          * write.
 1066          */
 1067 
 1068         rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr,
 1069             olddata[0], numbytes, node->dagHdr->bp);
 1070         /* Do new 'E' calculation. */
 1071         /*
 1072          * Find out the corresponding column in encoding matrix for write
 1073          * column to be encoded into redundant disk 'E'.
 1074          */
 1075         scol = rf_EUCol(layoutPtr, fpda->raidAddress);
 1076         /*
 1077          * olddata[0] now is source buffer pointer; epda->bufPtr is the dest
 1078          * buffer pointer.
 1079          */
 1080         rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2,
 1081             epda->bufPtr, fpda->numSector);
 1082 
 1083         /* Do new 'P' calculation. */
 1084         rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
 1085         /* Free the allocated buffer. */
 1086         RF_Free(olddata[0], numbytes);
 1087         RF_Free(olddata[1], numbytes);
 1088         RF_Free(buf, numDataCol * sizeof(char *));
 1089 
 1090         RF_ETIMER_STOP(timer);
 1091         RF_ETIMER_EVAL(timer);
 1092         if (tracerec) {
 1093                 tracerec->q_us += RF_ETIMER_VAL_US(timer);
 1094         }
 1095         rf_GenericWakeupFunc(node, 0);
 1096         return (0);
 1097 }

/* [<][>][^][v][top][bottom][index][help] */