root/dev/raidframe/rf_decluster.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rf_ConfigureDeclustered
  2. rf_ShutdownDeclusteredDS
  3. rf_ConfigureDeclusteredDS
  4. rf_MapSectorDeclustered
  5. rf_MapParityDeclustered
  6. rf_IdentifyStripeDeclustered
  7. rf_GetDefaultHeadSepLimitDeclustered
  8. rf_GetDefaultNumFloatingReconBuffersDeclustered
  9. rf_decluster_adjust_params
  10. rf_MapSIDToPSIDDeclustered
  11. rf_remap_to_spare_space
  12. rf_InstallSpareTable
  13. rf_SetSpareTable
  14. rf_GetNumSpareRUsDeclustered
  15. rf_FreeSpareTable

    1 /*      $OpenBSD: rf_decluster.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */
    2 /*      $NetBSD: rf_decluster.c,v 1.5 2000/03/07 01:54:29 oster Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1995 Carnegie-Mellon University.
    6  * All rights reserved.
    7  *
    8  * Author: Mark Holland
    9  *
   10  * Permission to use, copy, modify and distribute this software and
   11  * its documentation is hereby granted, provided that both the copyright
   12  * notice and this permission notice appear in all copies of the
   13  * software, derivative works or modified versions, and any portions
   14  * thereof, and that both notices appear in supporting documentation.
   15  *
   16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   19  *
   20  * Carnegie Mellon requests users of this software to return to
   21  *
   22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   23  *  School of Computer Science
   24  *  Carnegie Mellon University
   25  *  Pittsburgh PA 15213-3890
   26  *
   27  * any improvements or extensions that they make and grant Carnegie the
   28  * rights to redistribute these changes.
   29  */
   30 
   31 /*****************************************************************************
   32  *
   33  * rf_decluster.c -- Code related to the declustered layout.
   34  *
   35  * Created 10-21-92 (MCH)
   36  *
   37  * Nov 93:      Adding support for distributed sparing. This code is a little
   38  *              complex; the basic layout used is as follows:
   39  *              Let F = (v-1)/GCD(r,v-1). The spare space for each set of
   40  *              F consecutive fulltables is grouped together and placed after
   41  *              that set of tables.
   42  *                      +-------------------------------+
   43  *                      |         F fulltables          |
   44  *                      |         Spare Space           |
   45  *                      |         F fulltables          |
   46  *                      |         Spare Space           |
   47  *                      |             ...               |
   48  *                      +-------------------------------+
   49  *
   50  *****************************************************************************/
   51 
   52 #include "rf_types.h"
   53 #include "rf_raid.h"
   54 #include "rf_raidframe.h"
   55 #include "rf_configure.h"
   56 #include "rf_decluster.h"
   57 #include "rf_debugMem.h"
   58 #include "rf_utils.h"
   59 #include "rf_alloclist.h"
   60 #include "rf_general.h"
   61 #include "rf_shutdown.h"
   62 
   63 extern int rf_copyback_in_progress;     /* Debug only. */
   64 
   65 /* Found in rf_kintf.c */
   66 int  rf_GetSpareTableFromDaemon(RF_SparetWait_t *);
   67 
   68 /* Configuration code. */
   69 
   70 int
   71 rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
   72     RF_Config_t *cfgPtr)
   73 {
   74         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
   75         int b, v, k, r, lambda; /* block design params */
   76         int i, j;
   77         RF_RowCol_t *first_avail_slot;
   78         RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
   79         RF_DeclusteredConfigInfo_t *info;
   80         RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs,
   81             numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
   82         RF_StripeCount_t totSparePUsPerDisk;
   83         RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
   84         RF_SectorCount_t SpareSpaceInSUs;
   85         char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
   86         RF_StripeNum_t l, SUID;
   87 
   88         SUID = l = 0;
   89         numCompleteSpareRegionsPerDisk = 0;
   90 
   91         /* 1. Create layout specific structure. */
   92         RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t),
   93             (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
   94         if (info == NULL)
   95                 return (ENOMEM);
   96         layoutPtr->layoutSpecificInfo = (void *) info;
   97         info->SpareTable = NULL;
   98 
   99         /* 2. Extract parameters from the config structure. */
  100         if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
  101                 bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
  102         }
  103         cfgBuf += RF_SPAREMAP_NAME_LEN;
  104 
  105         b = *((int *) cfgBuf);
  106         cfgBuf += sizeof(int);
  107         v = *((int *) cfgBuf);
  108         cfgBuf += sizeof(int);
  109         k = *((int *) cfgBuf);
  110         cfgBuf += sizeof(int);
  111         r = *((int *) cfgBuf);
  112         cfgBuf += sizeof(int);
  113         lambda = *((int *) cfgBuf);
  114         cfgBuf += sizeof(int);
  115         raidPtr->noRotate = *((int *) cfgBuf);
  116         cfgBuf += sizeof(int);
  117 
  118         /*
  119          * The sparemaps are generated assuming that parity is rotated, so we
  120          * issue a warning if both distributed sparing and no-rotate are on at
  121          * the same time.
  122          */
  123         if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) &&
  124             raidPtr->noRotate) {
  125                 RF_ERRORMSG("Warning:  distributed sparing specified without"
  126                     " parity rotation.\n");
  127         }
  128         if (raidPtr->numCol != v) {
  129                 RF_ERRORMSG2("RAID: config error: table element count (%d)"
  130                     " not equal to no. of cols (%d).\n", v, raidPtr->numCol);
  131                 return (EINVAL);
  132         }
  133         /* 3. Set up the values used in the mapping code. */
  134         info->BlocksPerTable = b;
  135         info->Lambda = lambda;
  136         info->NumParityReps = info->groupSize = k;
  137         /* b blks, k-1 SUs each. */
  138         info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU;
  139         info->SUsPerFullTable = k * info->SUsPerTable;  /* rot k times */
  140         info->PUsPerBlock = k - 1;
  141         info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
  142         info->TableDepthInPUs = (b * k) / v;
  143         /* k repetitions. */
  144         info->FullTableDepthInPUs = info->TableDepthInPUs * k;
  145 
  146         /* Used only in distributed sparing case. */
  147         /* (v-1)/gcd fulltables. */
  148         info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1);
  149         info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
  150         info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion /
  151             (v - 1)) * layoutPtr->SUsPerPU;
  152 
  153         /* Check to make sure the block design is sufficiently small. */
  154         if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
  155                 if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU +
  156                     info->SpareSpaceDepthPerRegionInSUs >
  157                     layoutPtr->stripeUnitsPerDisk) {
  158                         RF_ERRORMSG3("RAID: config error: Full Table depth"
  159                             " (%d) + Spare Space (%d) larger than disk size"
  160                             " (%d) (BD too big).\n",
  161                             (int) info->FullTableDepthInPUs,
  162                             (int) info->SpareSpaceDepthPerRegionInSUs,
  163                             (int) layoutPtr->stripeUnitsPerDisk);
  164                         return (EINVAL);
  165                 }
  166         } else {
  167                 if (info->TableDepthInPUs * layoutPtr->SUsPerPU >
  168                     layoutPtr->stripeUnitsPerDisk) {
  169                         RF_ERRORMSG2("RAID: config error: Table depth (%d)"
  170                             " larger than disk size (%d) (BD too big).\n",
  171                             (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU),
  172                             (int) layoutPtr->stripeUnitsPerDisk);
  173                         return (EINVAL);
  174                 }
  175         }
  176 
  177 
  178         /*
  179          * Compute the size of each disk, and the number of tables in the last
  180          * fulltable (which need not be complete).
  181          */
  182         if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
  183 
  184                 PUsPerDisk = layoutPtr->stripeUnitsPerDisk /
  185                     layoutPtr->SUsPerPU;
  186                 spareRegionDepthInPUs =
  187                     (info->TablesPerSpareRegion * info->TableDepthInPUs +
  188                     (info->TablesPerSpareRegion * info->TableDepthInPUs) /
  189                     (v - 1));
  190                 info->SpareRegionDepthInSUs =
  191                     spareRegionDepthInPUs * layoutPtr->SUsPerPU;
  192 
  193                 numCompleteSpareRegionsPerDisk =
  194                     PUsPerDisk / spareRegionDepthInPUs;
  195                 info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
  196                 extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
  197 
  198                 /*
  199                  * Assume conservatively that we need the full amount of spare
  200                  * space in one region in order to provide spares for the
  201                  * partial spare region at the end of the array. We set "i"
  202                  * to the number of tables in the partial spare region. This
  203                  * may actually include some fulltables.
  204                  */
  205                 extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs /
  206                     layoutPtr->SUsPerPU);
  207                 if (extraPUsPerDisk <= 0)
  208                         i = 0;
  209                 else
  210                         i = extraPUsPerDisk / info->TableDepthInPUs;
  211 
  212                 complete_FT_count = raidPtr->numRow *
  213                     (numCompleteSpareRegionsPerDisk *
  214                     (info->TablesPerSpareRegion / k) + i / k);
  215                 info->FullTableLimitSUID =
  216                     complete_FT_count * info->SUsPerFullTable;
  217                 info->ExtraTablesPerDisk = i % k;
  218 
  219                 /*
  220                  * Note that in the last spare region, the spare space is
  221                  * complete even though data/parity space is not.
  222                  */
  223                 totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) *
  224                     (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
  225                 info->TotSparePUsPerDisk = totSparePUsPerDisk;
  226 
  227                 layoutPtr->stripeUnitsPerDisk =
  228                     ((complete_FT_count / raidPtr->numRow) *
  229                     info->FullTableDepthInPUs + /* data & parity space */
  230                     info->ExtraTablesPerDisk * info->TableDepthInPUs +
  231                     totSparePUsPerDisk          /* spare space */
  232                     ) * layoutPtr->SUsPerPU;
  233                 layoutPtr->dataStripeUnitsPerDisk =
  234                     (complete_FT_count * info->FullTableDepthInPUs +
  235                     info->ExtraTablesPerDisk * info->TableDepthInPUs) *
  236                     layoutPtr->SUsPerPU * (k - 1) / k;
  237 
  238         } else {
  239                 /*
  240                  * Non-dist spare case:  force each disk to contain an
  241                  * integral number of tables.
  242                  */
  243                 layoutPtr->stripeUnitsPerDisk /=
  244                     (info->TableDepthInPUs * layoutPtr->SUsPerPU);
  245                 layoutPtr->stripeUnitsPerDisk *=
  246                     (info->TableDepthInPUs * layoutPtr->SUsPerPU);
  247 
  248                 /*
  249                  * Compute the number of tables in the last fulltable, which
  250                  * need not be complete.
  251                  */
  252                 complete_FT_count =
  253                     ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
  254                     info->FullTableDepthInPUs) * raidPtr->numRow;
  255 
  256                 info->FullTableLimitSUID =
  257                     complete_FT_count * info->SUsPerFullTable;
  258                 info->ExtraTablesPerDisk =
  259                     ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
  260                     info->TableDepthInPUs) % k;
  261         }
  262 
  263         raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
  264                     layoutPtr->sectorsPerStripeUnit;
  265 
  266         /*
  267          * Find the disk offset of the stripe unit where the last fulltable
  268          * starts.
  269          */
  270         numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
  271         diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk *
  272             info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  273         if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
  274                 SpareSpaceInSUs = numCompleteSpareRegionsPerDisk *
  275                     info->SpareSpaceDepthPerRegionInSUs;
  276                 diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
  277                 info->DiskOffsetOfLastSpareSpaceChunkInSUs =
  278                     diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
  279                     info->TableDepthInPUs * layoutPtr->SUsPerPU;
  280         }
  281         info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
  282         info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
  283 
  284         /* 4. Create and initialize the lookup tables. */
  285         info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
  286         if (info->LayoutTable == NULL)
  287                 return (ENOMEM);
  288         info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
  289         if (info->OffsetTable == NULL)
  290                 return (ENOMEM);
  291         info->BlockTable = rf_make_2d_array(info->TableDepthInPUs *
  292             layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
  293         if (info->BlockTable == NULL)
  294                 return (ENOMEM);
  295 
  296         first_avail_slot = rf_make_1d_array(v, NULL);
  297         if (first_avail_slot == NULL)
  298                 return (ENOMEM);
  299 
  300         for (i = 0; i < b; i++)
  301                 for (j = 0; j < k; j++)
  302                         info->LayoutTable[i][j] = *cfgBuf++;
  303 
  304         /* Initialize the offset table. */
  305         for (i = 0; i < b; i++)
  306                 for (j = 0; j < k; j++) {
  307                         info->OffsetTable[i][j] =
  308                             first_avail_slot[info->LayoutTable[i][j]];
  309                         first_avail_slot[info->LayoutTable[i][j]]++;
  310                 }
  311 
  312         /* Initialize the block table. */
  313         for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
  314                 for (i = 0; i < b; i++) {
  315                         for (j = 0; j < k; j++) {
  316                                 info->BlockTable[(info->OffsetTable[i][j] *
  317                                     layoutPtr->SUsPerPU) + l]
  318                                     [info->LayoutTable[i][j]] = SUID;
  319                         }
  320                         SUID++;
  321                 }
  322         }
  323 
  324         rf_free_1d_array(first_avail_slot, v);
  325 
  326         /* 5. Set up the remaining redundant-but-useful parameters. */
  327 
  328         raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow *
  329             info->ExtraTablesPerDisk) * info->SUsPerTable *
  330             layoutPtr->sectorsPerStripeUnit;
  331         layoutPtr->numStripe = (raidPtr->totalSectors /
  332             layoutPtr->sectorsPerStripeUnit) / (k - 1);
  333 
  334         /*
  335          * Strange evaluation order below to try and minimize overflow
  336          * problems.
  337          */
  338 
  339         layoutPtr->dataSectorsPerStripe =
  340             (k - 1) * layoutPtr->sectorsPerStripeUnit;
  341         layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
  342             raidPtr->logBytesPerSector;
  343         layoutPtr->numDataCol = k - 1;
  344         layoutPtr->numParityCol = 1;
  345 
  346         return (0);
  347 }
  348 
  349 /* Declustering with distributed sparing. */
  350 void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
  351 void
  352 rf_ShutdownDeclusteredDS(RF_ThreadArg_t arg)
  353 {
  354         RF_DeclusteredConfigInfo_t *info;
  355         RF_Raid_t *raidPtr;
  356 
  357         raidPtr = (RF_Raid_t *) arg;
  358         info =
  359             (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  360         if (info->SpareTable)
  361                 rf_FreeSpareTable(raidPtr);
  362 }
  363 
  364 int
  365 rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  366     RF_Config_t *cfgPtr)
  367 {
  368         int rc;
  369 
  370         rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
  371         if (rc)
  372                 return (rc);
  373 
  374         rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
  375         if (rc) {
  376                 RF_ERRORMSG1("Got %d adding shutdown event for"
  377                     " DeclusteredDS.\n", rc);
  378                 rf_ShutdownDeclusteredDS(raidPtr);
  379                 return (rc);
  380         }
  381 
  382         return (0);
  383 }
  384 
  385 void
  386 rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
  387     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
  388 {
  389         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  390         RF_DeclusteredConfigInfo_t *info =
  391             (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  392         RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
  393         RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
  394         RF_StripeNum_t BlockID, BlockOffset, RepIndex;
  395         RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
  396         RF_StripeCount_t fulltable_depth =
  397             info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  398         RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
  399 
  400         rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
  401             &fulltable_depth, &base_suid);
  402 
  403         /* Fulltable ID within array (across rows). */
  404         FullTableID = SUID / sus_per_fulltable;
  405         if (raidPtr->numRow == 1)
  406                 *row = 0;       /* Avoid a mod and a div in the common case. */
  407         else {
  408                 *row = FullTableID % raidPtr->numRow;
  409                 /* Convert to fulltable ID on this disk. */
  410                 FullTableID /= raidPtr->numRow;
  411         }
  412         if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
  413                 SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
  414                 SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
  415         }
  416         FullTableOffset = SUID % sus_per_fulltable;
  417         TableID = FullTableOffset / info->SUsPerTable;
  418         TableOffset = FullTableOffset - TableID * info->SUsPerTable;
  419         BlockID = TableOffset / info->PUsPerBlock;
  420         BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
  421         BlockID %= info->BlocksPerTable;
  422         RepIndex = info->PUsPerBlock - TableID;
  423         if (!raidPtr->noRotate)
  424                 BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
  425         *col = info->LayoutTable[BlockID][BlockOffset];
  426 
  427         /* Remap to distributed spare space if indicated. */
  428         if (remap) {
  429                 RF_ASSERT(raidPtr->Disks[*row][*col].status ==
  430                     rf_ds_reconstructing ||
  431                     raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
  432                     (rf_copyback_in_progress &&
  433                     raidPtr->Disks[*row][*col].status == rf_ds_optimal));
  434                 rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
  435                     TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
  436                     &outSU);
  437         } else {
  438 
  439                 outSU = base_suid;
  440                 outSU += FullTableID * fulltable_depth;
  441                         /* Offset to start of FT. */
  442                 outSU += SpareSpace;
  443                         /* Skip rsvd spare space. */
  444                 outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
  445                         /* Offset to start of table. */
  446                 outSU += info->OffsetTable[BlockID][BlockOffset] *
  447                     layoutPtr->SUsPerPU;
  448                         /* Offset to the PU. */
  449         }
  450         outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
  451                 /* offs to the SU within a PU */
  452 
  453         /*
  454          * Convert SUs to sectors, and, if not aligned to SU boundary, add in
  455          * offset to sector.
  456          */
  457         *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
  458             (raidSector % layoutPtr->sectorsPerStripeUnit);
  459 
  460         RF_ASSERT(*col != -1);
  461 }
  462 
  463 /*
  464  * Prototyping this inexplicably causes the compile of the layout table
  465  * (rf_layout.c) to fail.
  466  */
  467 void
  468 rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
  469     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
  470 {
  471         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  472         RF_DeclusteredConfigInfo_t *info =
  473             (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  474         RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
  475         RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
  476         RF_StripeNum_t BlockID, BlockOffset, RepIndex;
  477         RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
  478         RF_StripeCount_t fulltable_depth =
  479             info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  480         RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
  481 
  482         rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
  483             &fulltable_depth, &base_suid);
  484 
  485         /* Compute row & (possibly) spare space exactly as before. */
  486         FullTableID = SUID / sus_per_fulltable;
  487         if (raidPtr->numRow == 1)
  488                 *row = 0;       /* Avoid a mod and a div in the common case. */
  489         else {
  490                 *row = FullTableID % raidPtr->numRow;
  491                 /* Convert to fulltable ID on this disk. */
  492                 FullTableID /= raidPtr->numRow;
  493         }
  494         if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
  495                 SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
  496                 SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
  497         }
  498         /* Compute BlockID and RepIndex exactly as before. */
  499         FullTableOffset = SUID % sus_per_fulltable;
  500         TableID = FullTableOffset / info->SUsPerTable;
  501         TableOffset = FullTableOffset - TableID * info->SUsPerTable;
  502         /*TableOffset   = FullTableOffset % info->SUsPerTable;*/
  503         /*BlockID       = (TableOffset / info->PUsPerBlock) %
  504          *info->BlocksPerTable;*/
  505         BlockID = TableOffset / info->PUsPerBlock;
  506         /*BlockOffset   = TableOffset % info->PUsPerBlock;*/
  507         BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
  508         BlockID %= info->BlocksPerTable;
  509 
  510         /* The parity block is in the position indicated by RepIndex. */
  511         RepIndex = (raidPtr->noRotate) ?
  512             info->PUsPerBlock : info->PUsPerBlock - TableID;
  513         *col = info->LayoutTable[BlockID][RepIndex];
  514 
  515         if (remap) {
  516                 RF_ASSERT(raidPtr->Disks[*row][*col].status ==
  517                     rf_ds_reconstructing ||
  518                     raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
  519                     (rf_copyback_in_progress &&
  520                     raidPtr->Disks[*row][*col].status == rf_ds_optimal));
  521                 rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
  522                     TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
  523                     &outSU);
  524         } else {
  525 
  526                 /*
  527                  * Compute sector as before, except use RepIndex instead of
  528                  * BlockOffset.
  529                  */
  530                 outSU = base_suid;
  531                 outSU += FullTableID * fulltable_depth;
  532                 outSU += SpareSpace;    /* skip rsvd spare space */
  533                 outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
  534                 outSU += info->OffsetTable[BlockID][RepIndex] *
  535                     layoutPtr->SUsPerPU;
  536         }
  537 
  538         outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
  539         *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
  540             (raidSector % layoutPtr->sectorsPerStripeUnit);
  541 
  542         RF_ASSERT(*col != -1);
  543 }
  544 
  545 /*
  546  * Return an array of ints identifying the disks that comprise the stripe
  547  * containing the indicated address.
  548  * The caller must _never_ attempt to modify this array.
  549  */
  550 void
  551 rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
  552     RF_RowCol_t **diskids, RF_RowCol_t *outRow)
  553 {
  554         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  555         RF_DeclusteredConfigInfo_t *info =
  556             (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  557         RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
  558         RF_StripeCount_t fulltable_depth =
  559             info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  560         RF_StripeNum_t base_suid = 0;
  561         RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
  562         RF_StripeNum_t stripeID, FullTableID;
  563         int tableOffset;
  564 
  565         rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
  566             &fulltable_depth, &base_suid);
  567         /* Fulltable ID within array (across rows). */
  568         FullTableID = SUID / sus_per_fulltable;
  569         *outRow = FullTableID % raidPtr->numRow;
  570         /* Find stripe offset into array. */
  571         stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID);
  572         /* Find offset into block design table. */
  573         tableOffset = (stripeID % info->BlocksPerTable);
  574         *diskids = info->LayoutTable[tableOffset];
  575 }
  576 
  577 /*
  578  * This returns the default head-separation limit, measured in
  579  * "required units for reconstruction". Each time a disk fetches
  580  * a unit, it bumps a counter. The head-sep code prohibits any disk
  581  * from getting more than headSepLimit counter values ahead of any
  582  * other.
  583  *
  584  * We assume here that the number of floating recon buffers is already
  585  * set. There are r stripes to be reconstructed in each table, and so
  586  * if we have a total of B buffers, we can have at most B/r tables
  587  * under recon at any one time. In each table, lambda units are required
  588  * from each disk, so given B buffers, the head sep limit has to be
  589  * (lambda*B)/r units. We subtract one to avoid weird boundary cases.
  590  *
  591  * For example, suppose we are given 50 buffers, r=19, and lambda=4 as in
  592  * the 20.5 design. There are 19 stripes/table to be reconstructed, so
  593  * we can have 50/19 tables concurrently under reconstruction, which means
  594  * we can allow the fastest disk to get 50/19 tables ahead of the slower
  595  * disk. There are lambda "required units" for each disk, so the fastest
  596  * disk can get 4*50/19 = 10 counter values ahead of the slowest.
  597  *
  598  * If numBufsToAccumulate is not 1, we need to limit the head sep further
  599  * because multiple bufs will be required for each stripe under recon.
  600  */
  601 RF_HeadSepLimit_t
  602 rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr)
  603 {
  604         RF_DeclusteredConfigInfo_t *info =
  605             (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  606 
  607         return (info->Lambda * raidPtr->numFloatingReconBufs /
  608             info->TableDepthInPUs / rf_numBufsToAccumulate);
  609 }
  610 
  611 /*
  612  * Return the default number of recon buffers to use. The value
  613  * is somewhat arbitrary...  It's intended to be large enough to
  614  * allow for a reasonably large head-sep limit, but small enough
  615  * that you don't use up all your system memory with buffers.
  616  */
  617 int
  618 rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
  619 {
  620         return (100 * rf_numBufsToAccumulate);
  621 }
  622 
  623 /*
  624  * Sectors in the last fulltable of the array need to be handled
  625  * specially since this fulltable can be incomplete. This function
  626  * changes the values of certain params to handle this.
  627  *
  628  * The idea here is that MapSector et. al. figure out which disk the
  629  * addressed unit lives on by computing the modulos of the unit number
  630  * with the number of units per fulltable, table, etc.  In the last
  631  * fulltable, there are fewer units per fulltable, so we need to adjust
  632  * the number of user data units per fulltable to reflect this.
  633  *
  634  * So, we (1) convert the fulltable size and depth parameters to
  635  * the size of the partial fulltable at the end, (2) compute the
  636  * disk sector offset where this fulltable starts, and (3) convert
  637  * the users stripe unit number from an offset into the array to
  638  * an offset into the last fulltable.
  639  */
  640 void
  641 rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t *SUID,
  642     RF_StripeCount_t *sus_per_fulltable, RF_StripeCount_t *fulltable_depth,
  643     RF_StripeNum_t *base_suid)
  644 {
  645         RF_DeclusteredConfigInfo_t *info =
  646             (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  647 
  648         if (*SUID >= info->FullTableLimitSUID) {
  649                 /* New full table size is size of last full table on disk. */
  650                 *sus_per_fulltable =
  651                     info->ExtraTablesPerDisk * info->SUsPerTable;
  652 
  653                 /* New full table depth is corresponding depth. */
  654                 *fulltable_depth =
  655                     info->ExtraTablesPerDisk * info->TableDepthInPUs *
  656                     layoutPtr->SUsPerPU;
  657 
  658                 /* Set up the new base offset. */
  659                 *base_suid = info->DiskOffsetOfLastFullTableInSUs;
  660 
  661                 /*
  662                  * Convert user's array address to an offset into the last
  663                  * fulltable.
  664                  */
  665                 *SUID -= info->FullTableLimitSUID;
  666         }
  667 }
  668 
  669 /*
  670  * Map a stripe ID to a parity stripe ID.
  671  * See comment above RaidAddressToParityStripeID in layout.c.
  672  */
  673 void
  674 rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
  675     RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
  676 {
  677         RF_DeclusteredConfigInfo_t *info;
  678 
  679         info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  680 
  681         *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) *
  682             info->BlocksPerTable + (stripeID % info->BlocksPerTable);
  683         *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) /
  684             info->BlocksPerTable;
  685         RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU);
  686 }
  687 
  688 /*
  689  * Called from MapSector and MapParity to retarget an access at the spare unit.
  690  * Modifies the "col" and "outSU" parameters only.
  691  */
  692 void
  693 rf_remap_to_spare_space(RF_RaidLayout_t *layoutPtr,
  694     RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row,
  695     RF_StripeNum_t FullTableID, RF_StripeNum_t TableID, RF_SectorNum_t BlockID,
  696     RF_StripeNum_t base_suid, RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol,
  697     RF_StripeNum_t *outSU)
  698 {
  699         RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion,
  700             lastSROffset, which_ft;
  701 
  702         /*
  703          * Note that FullTableID and hence SpareRegion may have gotten
  704          * tweaked by rf_decluster_adjust_params. We detect this by
  705          * noticing that base_suid is not 0.
  706          */
  707         if (base_suid == 0) {
  708                 ftID = FullTableID;
  709         } else {
  710                 /*
  711                  * There may be > 1.0 full tables in the last (i.e. partial)
  712                  * spare region. Find out which of these we are in.
  713                  */
  714                 lastSROffset = info->NumCompleteSRs *
  715                     info->SpareRegionDepthInSUs;
  716                 which_ft =
  717                     (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) /
  718                     (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
  719 
  720                 /* Compute the actual full table ID. */
  721                 ftID = info->DiskOffsetOfLastFullTableInSUs /
  722                     (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) +
  723                     which_ft;
  724                 SpareRegion = info->NumCompleteSRs;
  725         }
  726         TableInSpareRegion = (ftID * info->NumParityReps + TableID) %
  727             info->TablesPerSpareRegion;
  728 
  729         *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
  730         RF_ASSERT(*outCol != -1);
  731 
  732         spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
  733             info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
  734             info->TableDepthInPUs * layoutPtr->SUsPerPU :
  735             (SpareRegion + 1) * info->SpareRegionDepthInSUs -
  736             info->SpareSpaceDepthPerRegionInSUs;
  737         *outSU = spareTableStartSU +
  738             info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
  739         if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
  740                 printf("rf_remap_to_spare_space: invalid remapped disk SU"
  741                     " offset %ld.\n", (long) *outSU);
  742         }
  743 }
  744 
  745 int
  746 rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol)
  747 {
  748         RF_DeclusteredConfigInfo_t *info =
  749             (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  750         RF_SparetWait_t *req;
  751         int retcode;
  752 
  753         RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
  754         req->C = raidPtr->numCol;
  755         req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
  756         req->fcol = fcol;
  757         req->SUsPerPU = raidPtr->Layout.SUsPerPU;
  758         req->TablesPerSpareRegion = info->TablesPerSpareRegion;
  759         req->BlocksPerTable = info->BlocksPerTable;
  760         req->TableDepthInPUs = info->TableDepthInPUs;
  761         req->SpareSpaceDepthPerRegionInSUs =
  762             info->SpareSpaceDepthPerRegionInSUs;
  763 
  764         retcode = rf_GetSpareTableFromDaemon(req);
  765         RF_ASSERT(!retcode);
  766         /* XXX -- Fix this to recover gracefully. -- XXX */
  767 
  768         return (retcode);
  769 }
  770 
  771 /*
  772  * Invoked via ioctl to install a spare table in the kernel.
  773  */
  774 int
  775 rf_SetSpareTable(RF_Raid_t *raidPtr, void *data)
  776 {
  777         RF_DeclusteredConfigInfo_t *info =
  778             (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  779         RF_SpareTableEntry_t **ptrs;
  780         int i, retcode;
  781 
  782         /*
  783          * What we need to copyin is a 2-d array, so first copyin the user
  784          * pointers to the rows in the table.
  785          */
  786         RF_Malloc(ptrs, info->TablesPerSpareRegion *
  787             sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
  788         retcode = copyin((caddr_t) data, (caddr_t) ptrs,
  789             info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
  790 
  791         if (retcode)
  792                 return (retcode);
  793 
  794         /* Now allocate kernel space for the row pointers. */
  795         RF_Malloc(info->SpareTable, info->TablesPerSpareRegion *
  796             sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
  797 
  798         /*
  799          * Now allocate kernel space for each row in the table, and copy it in
  800          * from user space. */
  801         for (i = 0; i < info->TablesPerSpareRegion; i++) {
  802                 RF_Malloc(info->SpareTable[i], info->BlocksPerTable *
  803                     sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
  804                 retcode = copyin(ptrs[i], info->SpareTable[i],
  805                     info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
  806                 if (retcode) {
  807                         /* Blow off the memory we have allocated. */
  808                         info->SpareTable = NULL;
  809                         return (retcode);
  810                 }
  811         }
  812 
  813         /* Free up the temporary array we used. */
  814         RF_Free(ptrs, info->TablesPerSpareRegion *
  815             sizeof(RF_SpareTableEntry_t *));
  816 
  817         return (0);
  818 }
  819 
  820 RF_ReconUnitCount_t
  821 rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr)
  822 {
  823         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  824 
  825         return (((RF_DeclusteredConfigInfo_t *)
  826             layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk);
  827 }
  828 
  829 
  830 void
  831 rf_FreeSpareTable(RF_Raid_t *raidPtr)
  832 {
  833         long i;
  834         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  835         RF_DeclusteredConfigInfo_t *info =
  836             (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  837         RF_SpareTableEntry_t **table = info->SpareTable;
  838 
  839         for (i = 0; i < info->TablesPerSpareRegion; i++) {
  840                 RF_Free(table[i], info->BlocksPerTable *
  841                     sizeof(RF_SpareTableEntry_t));
  842         }
  843         RF_Free(table, info->TablesPerSpareRegion *
  844             sizeof(RF_SpareTableEntry_t *));
  845         info->SpareTable = (RF_SpareTableEntry_t **) NULL;
  846 }

/* [<][>][^][v][top][bottom][index][help] */