root/dev/raidframe/rf_pq.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_RegularONPFunc
  2. rf_SimpleONPFunc
  3. rf_RecoveryPFunc
  4. rf_RegularPFunc
  5. rf_PQDagSelect
  6. rf_PQOne
  7. rf_PQOneTwo
  8. RF_CREATE_DAG_FUNC_DECL
  9. rf_RegularONQFunc
  10. rf_SimpleONQFunc
  11. RF_CREATE_DAG_FUNC_DECL
  12. rf_RegularQSubr
  13. rf_DegrQSubr
  14. rf_RegularPQFunc
  15. rf_RegularQFunc
  16. rf_Degraded_100_PQFunc
  17. rf_RecoveryQFunc
  18. rf_RecoveryPQFunc
  19. rf_PQ_DegradedWriteQFunc
  20. rf_IncQ
  21. rf_QDelta
  22. rf_PQ_recover
  23. rf_InvertQ

    1 /*      $OpenBSD: rf_pq.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $        */
    2 /*      $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $  */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: Daniel Stodolsky
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*
   32  * Code for RAID level 6 (P + Q) disk array architecture.
   33  */
   34 
   35 #include "rf_archs.h"
   36 #include "rf_types.h"
   37 #include "rf_raid.h"
   38 #include "rf_dag.h"
   39 #include "rf_dagffrd.h"
   40 #include "rf_dagffwr.h"
   41 #include "rf_dagdegrd.h"
   42 #include "rf_dagdegwr.h"
   43 #include "rf_dagutils.h"
   44 #include "rf_dagfuncs.h"
   45 #include "rf_etimer.h"
   46 #include "rf_pqdeg.h"
   47 #include "rf_general.h"
   48 #include "rf_map.h"
   49 #include "rf_pq.h"
   50 
   51 RF_RedFuncs_t rf_pFuncs = {
   52         rf_RegularONPFunc, "Regular Old-New P",
   53         rf_SimpleONPFunc, "Simple Old-New P"
   54 };
   55 RF_RedFuncs_t rf_pRecoveryFuncs = {
   56         rf_RecoveryPFunc, "Recovery P Func",
   57         rf_RecoveryPFunc, "Recovery P Func"
   58 };
   59 
   60 int
   61 rf_RegularONPFunc(RF_DagNode_t *node)
   62 {
   63         return (rf_RegularXorFunc(node));
   64 }
   65 
   66 
   67 /*
   68  * Same as simpleONQ func, but the coefficient is always 1.
   69  */
   70 
   71 int
   72 rf_SimpleONPFunc(RF_DagNode_t *node)
   73 {
   74         return (rf_SimpleXorFunc(node));
   75 }
   76 
   77 int
   78 rf_RecoveryPFunc(RF_DagNode_t *node)
   79 {
   80         return (rf_RecoveryXorFunc(node));
   81 }
   82 
   83 int
   84 rf_RegularPFunc(RF_DagNode_t *node)
   85 {
   86         return (rf_RegularXorFunc(node));
   87 }
   88 
   89 
   90 #if     (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   91 
   92 void rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
   93         unsigned char coeff);
   94 void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
   95         unsigned coeff);
   96 
   97 RF_RedFuncs_t rf_qFuncs = {
   98         rf_RegularONQFunc, "Regular Old-New Q",
   99         rf_SimpleONQFunc, "Simple Old-New Q"
  100 };
  101 RF_RedFuncs_t rf_qRecoveryFuncs = {
  102         rf_RecoveryQFunc, "Recovery Q Func",
  103         rf_RecoveryQFunc, "Recovery Q Func"
  104 };
  105 RF_RedFuncs_t rf_pqRecoveryFuncs = {
  106         rf_RecoveryPQFunc, "Recovery PQ Func",
  107         rf_RecoveryPQFunc, "Recovery PQ Func"
  108 };
  109 
  110 void
  111 rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
  112     RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
  113 {
  114         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  115         unsigned ndfail = asmap->numDataFailed;
  116         unsigned npfail = asmap->numParityFailed;
  117         unsigned ntfail = npfail + ndfail;
  118 
  119         RF_ASSERT(RF_IO_IS_R_OR_W(type));
  120         if (ntfail > 2) {
  121                 RF_ERRORMSG("more than two disks failed in a single group !"
  122                             "  Aborting I/O operation.\n");
  123                  /* *infoFunc = */ *createFunc = NULL;
  124                 return;
  125         }
  126         /* Ok, we can do this I/O. */
  127         if (type == RF_IO_TYPE_READ) {
  128                 switch (ndfail) {
  129                 case 0:
  130                         /* Fault free read. */
  131                         *createFunc = (RF_VoidFuncPtr)
  132                             rf_CreateFaultFreeReadDAG;  /* Same as raid 5. */
  133                         break;
  134                 case 1:
  135                         /* Lost a single data unit. */
  136                         /*
  137                          * Two cases:
  138                          * (1) Parity is not lost. Do a normal raid 5
  139                          *     reconstruct read.
  140                          * (2) Parity is lost. Do a reconstruct read using "q".
  141                          */
  142                         if (ntfail == 2) {      /* Also lost redundancy. */
  143                                 if (asmap->failedPDAs[1]->type ==
  144                                     RF_PDA_TYPE_PARITY)
  145                                         *createFunc = (RF_VoidFuncPtr)
  146                                             rf_PQ_110_CreateReadDAG;
  147                                 else
  148                                         *createFunc = (RF_VoidFuncPtr)
  149                                             rf_PQ_101_CreateReadDAG;
  150                         } else {
  151                                 /*
  152                                  * P and Q are ok. But is there a failure in
  153                                  * some unaccessed data unit ?
  154                                  */
  155                                 if (rf_NumFailedDataUnitsInStripe(raidPtr,
  156                                     asmap) == 2)
  157                                         *createFunc = (RF_VoidFuncPtr)
  158                                             rf_PQ_200_CreateReadDAG;
  159                                 else
  160                                         *createFunc = (RF_VoidFuncPtr)
  161                                             rf_PQ_100_CreateReadDAG;
  162                         }
  163                         break;
  164                 case 2:
  165                         /* Lost two data units. */
  166                         /* *infoFunc = rf_PQOneTwo; */
  167                         *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
  168                         break;
  169                 }
  170                 return;
  171         }
  172         /* A write. */
  173         switch (ntfail) {
  174         case 0:         /* Fault free. */
  175                 if (rf_suppressLocksAndLargeWrites ||
  176                     (((asmap->numStripeUnitsAccessed <=
  177                        (layoutPtr->numDataCol / 2)) &&
  178                       (layoutPtr->numDataCol != 1)) ||
  179                      (asmap->parityInfo->next != NULL) ||
  180                      (asmap->qInfo->next != NULL) ||
  181                      rf_CheckStripeForFailures(raidPtr, asmap))) {
  182 
  183                         *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
  184                 } else {
  185                         *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
  186                 }
  187                 break;
  188 
  189         case 1:         /* Single disk fault. */
  190                 if (npfail == 1) {
  191                         RF_ASSERT((asmap->failedPDAs[0]->type ==
  192                             RF_PDA_TYPE_PARITY) ||
  193                             (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
  194                         if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {
  195                                 /*
  196                                  * Q died, treat like normal mode raid5 write.
  197                                  */
  198                                 if (((asmap->numStripeUnitsAccessed <=
  199                                       (layoutPtr->numDataCol / 2)) ||
  200                                      (asmap->numStripeUnitsAccessed == 1)) ||
  201                                     rf_NumFailedDataUnitsInStripe(raidPtr,
  202                                      asmap))
  203                                         *createFunc = (RF_VoidFuncPtr)
  204                                             rf_PQ_001_CreateSmallWriteDAG;
  205                                 else
  206                                         *createFunc = (RF_VoidFuncPtr)
  207                                             rf_PQ_001_CreateLargeWriteDAG;
  208                         } else {/* Parity died, small write only updating Q. */
  209                                 if (((asmap->numStripeUnitsAccessed <=
  210                                       (layoutPtr->numDataCol / 2)) ||
  211                                      (asmap->numStripeUnitsAccessed == 1)) ||
  212                                     rf_NumFailedDataUnitsInStripe(raidPtr,
  213                                      asmap))
  214                                         *createFunc = (RF_VoidFuncPtr)
  215                                             rf_PQ_010_CreateSmallWriteDAG;
  216                                 else
  217                                         *createFunc = (RF_VoidFuncPtr)
  218                                             rf_PQ_010_CreateLargeWriteDAG;
  219                         }
  220                 } else {        /*
  221                                  * Data missing. Do a P reconstruct write if
  222                                  * only a single data unit is lost in the
  223                                  * stripe, otherwise a PQ reconstruct write.
  224                                  */
  225                         if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
  226                                 *createFunc = (RF_VoidFuncPtr)
  227                                     rf_PQ_200_CreateWriteDAG;
  228                         else
  229                                 *createFunc = (RF_VoidFuncPtr)
  230                                     rf_PQ_100_CreateWriteDAG;
  231                 }
  232                 break;
  233 
  234         case 2:         /* Two disk faults. */
  235                 switch (npfail) {
  236                 case 2: /* Both p and q dead. */
  237                         *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
  238                         break;
  239                 case 1: /* Either p or q and dead data. */
  240                         RF_ASSERT(asmap->failedPDAs[0]->type ==
  241                                   RF_PDA_TYPE_DATA);
  242                         RF_ASSERT((asmap->failedPDAs[1]->type ==
  243                                    RF_PDA_TYPE_PARITY) ||
  244                                   (asmap->failedPDAs[1]->type ==
  245                                    RF_PDA_TYPE_Q));
  246                         if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
  247                                 *createFunc = (RF_VoidFuncPtr)
  248                                     rf_PQ_101_CreateWriteDAG;
  249                         else
  250                                 *createFunc = (RF_VoidFuncPtr)
  251                                     rf_PQ_110_CreateWriteDAG;
  252                         break;
  253                 case 0: /* Double data loss. */
  254                         *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
  255                         break;
  256                 }
  257                 break;
  258 
  259         default:        /* More than 2 disk faults. */
  260                 *createFunc = NULL;
  261                 RF_PANIC();
  262         }
  263         return;
  264 }
  265 
  266 
  267 /*
  268  * Used as a stop gap info function.
  269  */
  270 #if 0
  271 void
  272 rf_PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
  273     RF_AccessStripeMap_t *asmap)
  274 {
  275         *nSucc = *nAnte = 1;
  276 }
  277 
  278 void
  279 rf_PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
  280     RF_AccessStripeMap_t *asmap)
  281 {
  282         *nSucc = 1;
  283         *nAnte = 2;
  284 }
  285 #endif
  286 
  287 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
  288 {
  289         rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
  290             allocList, 2, rf_RegularPQFunc, RF_FALSE);
  291 }
  292 
  293 int
  294 rf_RegularONQFunc(RF_DagNode_t *node)
  295 {
  296         int np = node->numParams;
  297         int d;
  298         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  299         int i;
  300         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  301         RF_Etimer_t timer;
  302         char *qbuf, *qpbuf;
  303         char *obuf, *nbuf;
  304         RF_PhysDiskAddr_t *old, *new;
  305         unsigned long coeff;
  306         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  307 
  308         RF_ETIMER_START(timer);
  309 
  310         d = (np - 3) / 4;
  311         RF_ASSERT(4 * d + 3 == np);
  312         qbuf = (char *) node->params[2 * d + 1].p;      /* Q buffer. */
  313         for (i = 0; i < d; i++) {
  314                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  315                 obuf = (char *) node->params[2 * i + 1].p;
  316                 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
  317                 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
  318                 RF_ASSERT(new->numSector == old->numSector);
  319                 RF_ASSERT(new->raidAddress == old->raidAddress);
  320                 /*
  321                  * The stripe unit within the stripe tells us the coefficient
  322                  * to use for the multiply.
  323                  */
  324                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  325                     new->raidAddress);
  326                 /*
  327                  * Compute the data unit offset within the column, then add
  328                  * one.
  329                  */
  330                 coeff = (coeff % raidPtr->Layout.numDataCol);
  331                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
  332                     old->startSector % secPerSU);
  333                 rf_QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
  334                     old->numSector), coeff);
  335         }
  336 
  337         RF_ETIMER_STOP(timer);
  338         RF_ETIMER_EVAL(timer);
  339         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  340         rf_GenericWakeupFunc(node, 0);  /*
  341                                          * Call wake func explicitly since no
  342                                          * I/O in this node.
  343                                          */
  344         return (0);
  345 }
  346 
  347 
  348 /*
  349  * See the SimpleXORFunc for the difference between a simple and regular func.
  350  * These Q functions should be used for
  351  *      new q = Q(data, old data, old q)
  352  * style updates and not for
  353  *      q = (new data, new data, ...)
  354  * computations.
  355  *
  356  * The simple q takes 2(2d+1)+1 params, where d is the number
  357  * of stripes written. The order of params is
  358  *   old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ...
  359  *   old data pda_d, old data buffer_d
  360  *   [2d] old q pda_0, old q buffer
  361  *   [2d_2] new data pda_0, new data buffer_0, ...
  362  *   new data pda_d, new data buffer_d
  363  *   raidPtr
  364  */
  365 
  366 int
  367 rf_SimpleONQFunc(RF_DagNode_t *node)
  368 {
  369         int np = node->numParams;
  370         int d;
  371         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  372         int i;
  373         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  374         RF_Etimer_t timer;
  375         char *qbuf;
  376         char *obuf, *nbuf;
  377         RF_PhysDiskAddr_t *old, *new;
  378         unsigned long coeff;
  379 
  380         RF_ETIMER_START(timer);
  381 
  382         d = (np - 3) / 4;
  383         RF_ASSERT(4 * d + 3 == np);
  384         qbuf = (char *) node->params[2 * d + 1].p;      /* Q buffer. */
  385         for (i = 0; i < d; i++) {
  386                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  387                 obuf = (char *) node->params[2 * i + 1].p;
  388                 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
  389                 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
  390                 RF_ASSERT(new->numSector == old->numSector);
  391                 RF_ASSERT(new->raidAddress == old->raidAddress);
  392                 /*
  393                  * The stripe unit within the stripe tells us the coefficient
  394                  * to use for the multiply.
  395                  */
  396                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  397                     new->raidAddress);
  398                 /*
  399                  * Compute the data unit offset within the column, then add
  400                  * one.
  401                  */
  402                 coeff = (coeff % raidPtr->Layout.numDataCol);
  403                 rf_QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
  404                     old->numSector), coeff);
  405         }
  406 
  407         RF_ETIMER_STOP(timer);
  408         RF_ETIMER_EVAL(timer);
  409         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  410         rf_GenericWakeupFunc(node, 0);  /*
  411                                          * Call wake func explicitly since no
  412                                          * I/O in this node.
  413                                          */
  414         return (0);
  415 }
  416 
  417 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
  418 {
  419         rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
  420             allocList, &rf_pFuncs, &rf_qFuncs);
  421 }
  422 
  423 
  424 void rf_RegularQSubr(RF_DagNode_t *, char *);
  425 
  426 void
  427 rf_RegularQSubr(RF_DagNode_t *node, char *qbuf)
  428 {
  429         int np = node->numParams;
  430         int d;
  431         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  432         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  433         int i;
  434         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  435         RF_Etimer_t timer;
  436         char *obuf, *qpbuf;
  437         RF_PhysDiskAddr_t *old;
  438         unsigned long coeff;
  439 
  440         RF_ETIMER_START(timer);
  441 
  442         d = (np - 1) / 2;
  443         RF_ASSERT(2 * d + 1 == np);
  444         for (i = 0; i < d; i++) {
  445                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  446                 obuf = (char *) node->params[2 * i + 1].p;
  447                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  448                     old->raidAddress);
  449                 /*
  450                  * Compute the data unit offset within the column, then add
  451                  * one.
  452                  */
  453                 coeff = (coeff % raidPtr->Layout.numDataCol);
  454                 /*
  455                  * The input buffers may not all be aligned with the start of
  456                  * the stripe. So shift by their sector offset within the
  457                  * stripe unit.
  458                  */
  459                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
  460                     old->startSector % secPerSU);
  461                 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
  462                     rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  463         }
  464 
  465         RF_ETIMER_STOP(timer);
  466         RF_ETIMER_EVAL(timer);
  467         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  468 }
  469 
  470 
  471 /*
  472  * Used in degraded writes.
  473  */
  474 
  475 void rf_DegrQSubr(RF_DagNode_t *);
  476 
  477 void
  478 rf_DegrQSubr(RF_DagNode_t *node)
  479 {
  480         int np = node->numParams;
  481         int d;
  482         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  483         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  484         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  485         RF_Etimer_t timer;
  486         char *qbuf = node->results[1];
  487         char *obuf, *qpbuf;
  488         RF_PhysDiskAddr_t *old;
  489         unsigned long coeff;
  490         unsigned fail_start;
  491         int i, j;
  492 
  493         old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
  494         fail_start = old->startSector % secPerSU;
  495 
  496         RF_ETIMER_START(timer);
  497 
  498         d = (np - 2) / 2;
  499         RF_ASSERT(2 * d + 2 == np);
  500         for (i = 0; i < d; i++) {
  501                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  502                 obuf = (char *) node->params[2 * i + 1].p;
  503                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  504                     old->raidAddress);
  505                 /*
  506                  * Compute the data unit offset within the column, then add
  507                  * one.
  508                  */
  509                 coeff = (coeff % raidPtr->Layout.numDataCol);
  510                 /*
  511                  * The input buffers may not all be aligned with the start of
  512                  * the stripe. So shift by their sector offset within the
  513                  * stripe unit.
  514                  */
  515                 j = old->startSector % secPerSU;
  516                 RF_ASSERT(j >= fail_start);
  517                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
  518                 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
  519                     rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  520         }
  521 
  522         RF_ETIMER_STOP(timer);
  523         RF_ETIMER_EVAL(timer);
  524         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  525 }
  526 
  527 
  528 /*
  529  * Called by large write code to compute the new parity and the new q.
  530  *
  531  * Structure of the params:
  532  *
  533  *   pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d (d = numDataCol)
  534  *   raidPtr
  535  *
  536  * For a total of 2d+1 arguments.
  537  * The result buffers results[0], results[1] are the buffers for the p and q,
  538  * respectively.
  539  *
  540  * We compute Q first, then compute P. The P calculation may try to reuse
  541  * one of the input buffers for its output, so if we computed P first, we would
  542  * corrupt the input for the q calculation.
  543  */
  544 
  545 int
  546 rf_RegularPQFunc(RF_DagNode_t *node)
  547 {
  548         rf_RegularQSubr(node, node->results[1]);
  549         return (rf_RegularXorFunc(node));       /* Does the wakeup. */
  550 }
  551 
  552 int
  553 rf_RegularQFunc(RF_DagNode_t *node)
  554 {
  555         /* Almost ... adjust Qsubr args. */
  556         rf_RegularQSubr(node, node->results[0]);
  557         rf_GenericWakeupFunc(node, 0);  /*
  558                                          * Call wake func explicitly since no
  559                                          * I/O in this node.
  560                                          */
  561         return (0);
  562 }
  563 
  564 
  565 /*
  566  * Called by singly degraded write code to compute the new parity and
  567  * the new q.
  568  *
  569  * Structure of the params:
  570  *
  571  *   pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d
  572  *   failedPDA raidPtr
  573  *
  574  * for a total of 2d+2 arguments.
  575  * The result buffers results[0], results[1] are the buffers for the parity
  576  * and q, respectively.
  577  *
  578  * We compute Q first, then compute parity. The parity calculation may try
  579  * to reuse one of the input buffers for its output, so if we computed parity
  580  * first, we would corrupt the input for the q calculation.
  581  *
  582  * We treat this identically to the regularPQ case, ignoring the failedPDA
  583  * extra argument.
  584  */
  585 
  586 void
  587 rf_Degraded_100_PQFunc(RF_DagNode_t *node)
  588 {
  589         int     np = node->numParams;
  590 
  591         RF_ASSERT(np >= 2);
  592         rf_DegrQSubr(node);
  593         rf_RecoveryXorFunc(node);
  594 }
  595 
  596 
  597 /*
  598  * The two below are used when reading a stripe with a single lost data unit.
  599  * The parameters are
  600  *
  601  *  pda_0, buffer_0, ..., pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
  602  *
  603  * and results[0] contains the data buffer, which is originally zero-filled.
  604  */
  605 
  606 /*
  607  * This Q func is used by the degraded-mode dag functions to recover lost data.
  608  * The second-to-last parameter is the PDA for the failed portion of the
  609  * access. The code here looks at this PDA and assumes that the xor target
  610  * buffer is equal in size to the number of sectors in the failed PDA. It then
  611  * uses the other PDAs in the parameter list to determine where within the
  612  * target buffer the corresponding data should be xored.
  613  *
  614  * Recall the basic equation is
  615  *
  616  *     Q = (data_1 + 2 * data_2 ... + k * data_k) mod 256
  617  *
  618  * so to recover data_j we need
  619  *
  620  *    J data_j = (Q - data_1 - 2 data_2 ... - k * data_k) mod 256
  621  *
  622  * So the coefficient for each buffer is (255 - data_col), and j should be
  623  * initialized by copying Q into it. Then we need to do a table lookup to
  624  * convert to solve
  625  *   data_j /= J
  626  *
  627  */
  628 
  629 int
  630 rf_RecoveryQFunc(RF_DagNode_t *node)
  631 {
  632         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  633         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  634         RF_PhysDiskAddr_t *failedPDA =
  635             (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
  636         int i;
  637         RF_PhysDiskAddr_t *pda;
  638         RF_RaidAddr_t suoffset;
  639         RF_RaidAddr_t failedSUOffset =
  640             rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
  641         char *srcbuf, *destbuf;
  642         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  643         RF_Etimer_t timer;
  644         unsigned long coeff;
  645 
  646         RF_ETIMER_START(timer);
  647         /* Start by copying Q into the buffer. */
  648         bcopy(node->params[node->numParams - 3].p, node->results[0],
  649             rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
  650         for (i = 0; i < node->numParams - 4; i += 2) {
  651                 RF_ASSERT(node->params[i + 1].p != node->results[0]);
  652                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  653                 srcbuf = (char *) node->params[i + 1].p;
  654                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  655                 destbuf = ((char *) node->results[0]) +
  656                     rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
  657                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  658                     pda->raidAddress);
  659                 /* Compute the data unit offset within the column. */
  660                 coeff = (coeff % raidPtr->Layout.numDataCol);
  661                 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf,
  662                     rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
  663         }
  664         /* Do the nasty inversion now. */
  665         coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  666             failedPDA->startSector) % raidPtr->Layout.numDataCol);
  667         rf_InvertQ(node->results[0], node->results[0],
  668             rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
  669         RF_ETIMER_STOP(timer);
  670         RF_ETIMER_EVAL(timer);
  671         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  672         rf_GenericWakeupFunc(node, 0);
  673         return (0);
  674 }
  675 
  676 int
  677 rf_RecoveryPQFunc(RF_DagNode_t *node)
  678 {
  679         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  680         printf("raid%d: Recovery from PQ not implemented.\n", raidPtr->raidid);
  681         return (1);
  682 }
  683 
  684 
  685 /*
  686  * Degraded write Q subroutine.
  687  * Used when P is dead.
  688  * Large-write style Q computation.
  689  * Parameters:
  690  *
  691  * (pda, buf), (pda, buf), ..., (failedPDA, bufPtr), failedPDA, raidPtr.
  692  *
  693  * We ignore failedPDA.
  694  *
  695  * This is a "simple style" recovery func.
  696  */
  697 
  698 void
  699 rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
  700 {
  701         int np = node->numParams;
  702         int d;
  703         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  704         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  705         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  706         RF_Etimer_t timer;
  707         char *qbuf = node->results[0];
  708         char *obuf, *qpbuf;
  709         RF_PhysDiskAddr_t *old;
  710         unsigned long coeff;
  711         int fail_start, i, j;
  712 
  713         old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
  714         fail_start = old->startSector % secPerSU;
  715 
  716         RF_ETIMER_START(timer);
  717 
  718         d = (np - 2) / 2;
  719         RF_ASSERT(2 * d + 2 == np);
  720 
  721         for (i = 0; i < d; i++) {
  722                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  723                 obuf = (char *) node->params[2 * i + 1].p;
  724                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
  725                     old->raidAddress);
  726                 /*
  727                  * Compute the data unit offset within the column, then add
  728                  * one.
  729                  */
  730                 coeff = (coeff % raidPtr->Layout.numDataCol);
  731                 j = old->startSector % secPerSU;
  732                 RF_ASSERT(j >= fail_start);
  733                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
  734                 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
  735                     rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  736         }
  737 
  738         RF_ETIMER_STOP(timer);
  739         RF_ETIMER_EVAL(timer);
  740         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  741         rf_GenericWakeupFunc(node, 0);
  742 }
  743 
  744 
  745 /* Q computations. */
  746 
  747 /*
  748  * Coeff - colummn;
  749  *
  750  * Compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
  751  *
  752  * On 5-bit basis;
  753  * Length in bytes;
  754  */
  755 
  756 void
  757 rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length,
  758     unsigned coeff)
  759 {
  760         unsigned long a, d, new;
  761         unsigned long a1, a2;
  762         unsigned int *q = &(rf_qfor[28 - coeff][0]);
  763         unsigned r = rf_rn[coeff + 1];
  764 
  765 #define EXTRACT(a,i)    ((a >> (5L*i)) & 0x1f)
  766 #define INSERT(a,i)     (a << (5L*i))
  767 
  768         length /= 8;
  769         /* 13 5 bit quants in a 64 bit word. */
  770         while (length) {
  771                 a = *buf++;
  772                 d = *dest;
  773                 a1 = EXTRACT(a, 0) ^ r;
  774                 a2 = EXTRACT(a, 1) ^ r;
  775                 new = INSERT(a2, 1) | a1;
  776                 a1 = EXTRACT(a, 2) ^ r;
  777                 a2 = EXTRACT(a, 3) ^ r;
  778                 a1 = q[a1];
  779                 a2 = q[a2];
  780                 new = new | INSERT(a1, 2) | INSERT(a2, 3);
  781                 a1 = EXTRACT(a, 4) ^ r;
  782                 a2 = EXTRACT(a, 5) ^ r;
  783                 a1 = q[a1];
  784                 a2 = q[a2];
  785                 new = new | INSERT(a1, 4) | INSERT(a2, 5);
  786                 a1 = EXTRACT(a, 5) ^ r;
  787                 a2 = EXTRACT(a, 6) ^ r;
  788                 a1 = q[a1];
  789                 a2 = q[a2];
  790                 new = new | INSERT(a1, 5) | INSERT(a2, 6);
  791 #if     RF_LONGSHIFT > 2
  792                 a1 = EXTRACT(a, 7) ^ r;
  793                 a2 = EXTRACT(a, 8) ^ r;
  794                 a1 = q[a1];
  795                 a2 = q[a2];
  796                 new = new | INSERT(a1, 7) | INSERT(a2, 8);
  797                 a1 = EXTRACT(a, 9) ^ r;
  798                 a2 = EXTRACT(a, 10) ^ r;
  799                 a1 = q[a1];
  800                 a2 = q[a2];
  801                 new = new | INSERT(a1, 9) | INSERT(a2, 10);
  802                 a1 = EXTRACT(a, 11) ^ r;
  803                 a2 = EXTRACT(a, 12) ^ r;
  804                 a1 = q[a1];
  805                 a2 = q[a2];
  806                 new = new | INSERT(a1, 11) | INSERT(a2, 12);
  807 #endif  /* RF_LONGSHIFT > 2 */
  808                 d ^= new;
  809                 *dest++ = d;
  810                 length--;
  811         }
  812 }
  813 
  814 
  815 /*
  816  * Compute.
  817  *
  818  * dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new)]
  819  *
  820  * On a five bit basis.
  821  * Optimization: compute old ^ new on 64 bit basis.
  822  *
  823  * Length in bytes.
  824  */
  825 
  826 void
  827 rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
  828     unsigned char coeff)
  829 {
  830         unsigned long a, d, new;
  831         unsigned long a1, a2;
  832         unsigned int *q = &(rf_qfor[28 - coeff][0]);
  833         unsigned int r = rf_rn[coeff + 1];
  834 
  835         r = a1 = a2 = new = d = a = 0; /* XXX For now... */
  836         q = NULL; /* XXX For now */
  837 
  838 #ifdef  _KERNEL
  839         /*
  840          * PQ in kernel currently not supported because the encoding/decoding
  841          * table is not present.
  842          */
  843         bzero(dest, length);
  844 #else   /* _KERNEL */
  845         /* This code probably doesn't work and should be rewritten. -wvcii */
  846         /* 13 5 bit quants in a 64 bit word. */
  847         length /= 8;
  848         while (length) {
  849                 a = *obuf++;    /*
  850                                  * XXX Need to reorg to avoid cache conflicts.
  851                                  */
  852                 a ^= *nbuf++;
  853                 d = *dest;
  854                 a1 = EXTRACT(a, 0) ^ r;
  855                 a2 = EXTRACT(a, 1) ^ r;
  856                 a1 = q[a1];
  857                 a2 = q[a2];
  858                 new = INSERT(a2, 1) | a1;
  859                 a1 = EXTRACT(a, 2) ^ r;
  860                 a2 = EXTRACT(a, 3) ^ r;
  861                 a1 = q[a1];
  862                 a2 = q[a2];
  863                 new = new | INSERT(a1, 2) | INSERT(a2, 3);
  864                 a1 = EXTRACT(a, 4) ^ r;
  865                 a2 = EXTRACT(a, 5) ^ r;
  866                 a1 = q[a1];
  867                 a2 = q[a2];
  868                 new = new | INSERT(a1, 4) | INSERT(a2, 5);
  869                 a1 = EXTRACT(a, 5) ^ r;
  870                 a2 = EXTRACT(a, 6) ^ r;
  871                 a1 = q[a1];
  872                 a2 = q[a2];
  873                 new = new | INSERT(a1, 5) | INSERT(a2, 6);
  874 #if     RF_LONGSHIFT > 2
  875                 a1 = EXTRACT(a, 7) ^ r;
  876                 a2 = EXTRACT(a, 8) ^ r;
  877                 a1 = q[a1];
  878                 a2 = q[a2];
  879                 new = new | INSERT(a1, 7) | INSERT(a2, 8);
  880                 a1 = EXTRACT(a, 9) ^ r;
  881                 a2 = EXTRACT(a, 10) ^ r;
  882                 a1 = q[a1];
  883                 a2 = q[a2];
  884                 new = new | INSERT(a1, 9) | INSERT(a2, 10);
  885                 a1 = EXTRACT(a, 11) ^ r;
  886                 a2 = EXTRACT(a, 12) ^ r;
  887                 a1 = q[a1];
  888                 a2 = q[a2];
  889                 new = new | INSERT(a1, 11) | INSERT(a2, 12);
  890 #endif  /* RF_LONGSHIFT > 2 */
  891                 d ^= new;
  892                 *dest++ = d;
  893                 length--;
  894         }
  895 #endif  /* _KERNEL */
  896 }
  897 
  898 
  899 /*
  900  * Recover columns a and b from the given p and q into
  901  * bufs abuf and bbuf. All bufs are word aligned.
  902  * Length is in bytes.
  903  */
  904 
  905 /*
  906  * XXX
  907  *
  908  * Everything about this seems wrong.
  909  */
  910 
  911 void
  912 rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf,
  913     unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
  914 {
  915         unsigned long p, q, a, a0, a1;
  916         int col = (29 * coeff_a) + coeff_b;
  917         unsigned char *q0 = &(rf_qinv[col][0]);
  918 
  919         length /= 8;
  920         while (length) {
  921                 p = *pbuf++;
  922                 q = *qbuf++;
  923                 a0 = EXTRACT(p, 0);
  924                 a1 = EXTRACT(q, 0);
  925                 a = q0[a0 << 5 | a1];
  926 
  927 #define MF(i)                                                           \
  928 do {                                                                    \
  929         a0 = EXTRACT(p, i);                                             \
  930         a1 = EXTRACT(q, i);                                             \
  931         a  = a | INSERT(q0[a0<<5 | a1], i);                             \
  932 } while (0)
  933 
  934                 MF(1);
  935                 MF(2);
  936                 MF(3);
  937                 MF(4);
  938                 MF(5);
  939                 MF(6);
  940 #if 0
  941                 MF(7);
  942                 MF(8);
  943                 MF(9);
  944                 MF(10);
  945                 MF(11);
  946                 MF(12);
  947 #endif  /* 0 */
  948                 *abuf++ = a;
  949                 *bbuf++ = a ^ p;
  950                 length--;
  951         }
  952 }
  953 
  954 
  955 /*
  956  * Lost parity and a data column. Recover that data column.
  957  * Assume col coeff is lost. Let q the contents of Q after
  958  * all surviving data columns have been q-xored out of it.
  959  * Then we have the equation
  960  *
  961  *   q[28-coeff][a_i ^ r_i+1] = q
  962  *
  963  * but q is cyclic with period 31.
  964  * So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
  965  *    q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
  966  *
  967  * so a_i = r_{coeff+1} ^ q[3+coeff][q]
  968  *
  969  * The routine is passed q buffer and the buffer
  970  * the data is to be recoverd into. They can be the same.
  971  */
  972 
  973 void
  974 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
  975     unsigned coeff)
  976 {
  977         unsigned long a, new;
  978         unsigned long a1, a2;
  979         unsigned int *q = &(rf_qfor[3 + coeff][0]);
  980         unsigned r = rf_rn[coeff + 1];
  981 
  982         /* 13 5 bit quants in a 64 bit word. */
  983         length /= 8;
  984         while (length) {
  985                 a = *qbuf++;
  986                 a1 = EXTRACT(a, 0);
  987                 a2 = EXTRACT(a, 1);
  988                 a1 = r ^ q[a1];
  989                 a2 = r ^ q[a2];
  990                 new = INSERT(a2, 1) | a1;
  991 
  992 #define M(i,j)                                                          \
  993 do {                                                                    \
  994         a1 = EXTRACT(a, i);                                             \
  995         a2 = EXTRACT(a, j);                                             \
  996         a1 = r ^ q[a1];                                                 \
  997         a2 = r ^ q[a2];                                                 \
  998         new = new | INSERT(a1, i) | INSERT(a2, j);                      \
  999 } while (0)
 1000 
 1001                 M(2, 3);
 1002                 M(4, 5);
 1003                 M(5, 6);
 1004 #if     RF_LONGSHIFT > 2
 1005                 M(7, 8);
 1006                 M(9, 10);
 1007                 M(11, 12);
 1008 #endif  /* RF_LONGSHIFT > 2 */
 1009                 *abuf++ = new;
 1010                 length--;
 1011         }
 1012 }
 1013 #endif  /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */

/* [<][>][^][v][top][bottom][index][help] */