dev/raidframe/rf

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
rf_ConfigureDisks
rf_ConfigureSpareDisks
rf_AllocDiskStructures
rf_AutoConfigureDisks
rf_ConfigureDisk
rf_print_label_status
rf_check_label_vitals
rf_CheckLabels
rf_add_hot_spare
rf_remove_hot_spare
rf_delete_component
rf_incorporate_hot_spare
    1 /*      $OpenBSD: rf_disks.c,v 1.12 2007/06/05 00:38:22 deraadt Exp $   */
    2 /*      $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $      */
    3 
    4 /*
    5  * Copyright (c) 1999 The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Greg Oster
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the NetBSD
   22  *      Foundation, Inc. and its contributors.
   23  * 4. Neither the name of The NetBSD Foundation nor the names of its
   24  *    contributors may be used to endorse or promote products derived
   25  *    from this software without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 /*
   40  * Copyright (c) 1995 Carnegie-Mellon University.
   41  * All rights reserved.
   42  *
   43  * Author: Mark Holland
   44  *
   45  * Permission to use, copy, modify and distribute this software and
   46  * its documentation is hereby granted, provided that both the copyright
   47  * notice and this permission notice appear in all copies of the
   48  * software, derivative works or modified versions, and any portions
   49  * thereof, and that both notices appear in supporting documentation.
   50  *
   51  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   52  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   53  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   54  *
   55  * Carnegie Mellon requests users of this software to return to
   56  *
   57  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   58  *  School of Computer Science
   59  *  Carnegie Mellon University
   60  *  Pittsburgh PA 15213-3890
   61  *
   62  * any improvements or extensions that they make and grant Carnegie the
   63  * rights to redistribute these changes.
   64  */
   65 
   66 /***************************************************************
   67  * rf_disks.c -- Code to perform operations on the actual disks.
   68  ***************************************************************/
   69 
   70 #include "rf_types.h"
   71 #include "rf_raid.h"
   72 #include "rf_alloclist.h"
   73 #include "rf_utils.h"
   74 #include "rf_configure.h"
   75 #include "rf_general.h"
   76 #include "rf_options.h"
   77 #include "rf_kintf.h"
   78 
   79 #if defined(__NetBSD__)
   80 #include "rf_netbsd.h"
   81 #elif defined(__OpenBSD__)
   82 #include "rf_openbsd.h"
   83 #endif
   84 
   85 #include <sys/types.h>
   86 #include <sys/param.h>
   87 #include <sys/systm.h>
   88 #include <sys/proc.h>
   89 #include <sys/ioctl.h>
   90 #include <sys/fcntl.h>
   91 #ifdef  __NETBSD__
   92 #include <sys/vnode.h>
   93 #endif  /* __NETBSD__ */
   94 
   95 int  rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
   96 void rf_print_label_status(RF_Raid_t *, int, int, char *,
   97         RF_ComponentLabel_t *);
   98 int  rf_check_label_vitals(RF_Raid_t *, int, int, char *,
   99         RF_ComponentLabel_t *, int, int);
  100 
  101 #define DPRINTF6(a,b,c,d,e,f)   if (rf_diskDebug) printf(a,b,c,d,e,f)
  102 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
  103 
  104 /****************************************************************************
  105  *
  106  * Initialize the disks comprising the array.
  107  *
  108  * We want the spare disks to have regular row,col numbers so that we can
  109  * easily substitue a spare for a failed disk. But, the driver code assumes
  110  * throughout that the array contains numRow by numCol _non-spare_ disks, so
  111  * it's not clear how to fit in the spares. This is an unfortunate holdover
  112  * from raidSim. The quick and dirty fix is to make row zero bigger than the
  113  * rest, and put all the spares in it. This probably needs to get changed
  114  * eventually.
  115  *
  116  ****************************************************************************/
  117 int
  118 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  119     RF_Config_t *cfgPtr)
  120 {
  121         RF_RaidDisk_t **disks;
  122         RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
  123         RF_RowCol_t r, c;
  124         int bs, ret;
  125         unsigned i, count, foundone = 0, numFailuresThisRow;
  126         int force;
  127 
  128         force = cfgPtr->force;
  129 
  130         ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
  131         if (ret)
  132                 goto fail;
  133 
  134         disks = raidPtr->Disks;
  135 
  136         for (r = 0; r < raidPtr->numRow; r++) {
  137                 numFailuresThisRow = 0;
  138                 for (c = 0; c < raidPtr->numCol; c++) {
  139                         ret = rf_ConfigureDisk(raidPtr,
  140                             &cfgPtr->devnames[r][c][0], &disks[r][c], r, c);
  141 
  142                         if (ret)
  143                                 goto fail;
  144 
  145                         if (disks[r][c].status == rf_ds_optimal) {
  146                                 raidread_component_label(
  147                                          raidPtr->raid_cinfo[r][c].ci_dev,
  148                                          raidPtr->raid_cinfo[r][c].ci_vp,
  149                                          &raidPtr->raid_cinfo[r][c].ci_label);
  150                         }
  151 
  152                         if (disks[r][c].status != rf_ds_optimal) {
  153                                 numFailuresThisRow++;
  154                         } else {
  155                                 if (disks[r][c].numBlocks < min_numblks)
  156                                         min_numblks = disks[r][c].numBlocks;
  157                                 DPRINTF7("Disk at row %d col %d: dev %s"
  158                                     " numBlocks %ld blockSize %d (%ld MB)\n",
  159                                     r, c, disks[r][c].devname,
  160                                     (long int) disks[r][c].numBlocks,
  161                                     disks[r][c].blockSize,
  162                                     (long int) disks[r][c].numBlocks *
  163                                      disks[r][c].blockSize / 1024 / 1024);
  164                         }
  165                 }
  166                 /* XXX Fix for n-fault tolerant. */
  167                 /*
  168                  * XXX This should probably check to see how many failures
  169                  * we can handle for this configuration !
  170                  */
  171                 if (numFailuresThisRow > 0)
  172                         raidPtr->status[r] = rf_rs_degraded;
  173         }
  174         /*
  175          * All disks must be the same size & have the same block size, bs must
  176          * be a power of 2.
  177          */
  178         bs = 0;
  179         for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
  180                 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
  181                         if (disks[r][c].status == rf_ds_optimal) {
  182                                 bs = disks[r][c].blockSize;
  183                                 foundone = 1;
  184                         }
  185                 }
  186         }
  187         if (!foundone) {
  188                 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in"
  189                     " the array.\n");
  190                 ret = EINVAL;
  191                 goto fail;
  192         }
  193         for (count = 0, i = 1; i; i <<= 1)
  194                 if (bs & i)
  195                         count++;
  196         if (count != 1) {
  197                 RF_ERRORMSG1("Error: block size on disks (%d) must be a"
  198                     " power of 2.\n", bs);
  199                 ret = EINVAL;
  200                 goto fail;
  201         }
  202 
  203         if (rf_CheckLabels(raidPtr, cfgPtr)) {
  204                 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
  205                 if (force != 0) {
  206                         printf("raid%d: Fatal errors being ignored.\n",
  207                             raidPtr->raidid);
  208                 } else {
  209                         ret = EINVAL;
  210                         goto fail;
  211                 }
  212         }
  213 
  214         for (r = 0; r < raidPtr->numRow; r++) {
  215                 for (c = 0; c < raidPtr->numCol; c++) {
  216                         if (disks[r][c].status == rf_ds_optimal) {
  217                                 if (disks[r][c].blockSize != bs) {
  218                                         RF_ERRORMSG2("Error: block size of"
  219                                             " disk at r %d c %d different from"
  220                                             " disk at r 0 c 0.\n", r, c);
  221                                         ret = EINVAL;
  222                                         goto fail;
  223                                 }
  224                                 if (disks[r][c].numBlocks != min_numblks) {
  225                                         RF_ERRORMSG3("WARNING: truncating disk"
  226                                             " at r %d c %d to %d blocks.\n",
  227                                             r, c, (int) min_numblks);
  228                                         disks[r][c].numBlocks = min_numblks;
  229                                 }
  230                         }
  231                 }
  232         }
  233 
  234         raidPtr->sectorsPerDisk = min_numblks;
  235         raidPtr->logBytesPerSector = ffs(bs) - 1;
  236         raidPtr->bytesPerSector = bs;
  237         raidPtr->sectorMask = bs - 1;
  238         return (0);
  239 
  240 fail:
  241         rf_UnconfigureVnodes(raidPtr);
  242 
  243         return (ret);
  244 }
  245 
  246 
  247 /****************************************************************************
  248  * Set up the data structures describing the spare disks in the array.
  249  * Recall from the above comment that the spare disk descriptors are stored
  250  * in row zero, which is specially expanded to hold them.
  251  ****************************************************************************/
  252 int
  253 rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
  254     RF_Config_t * cfgPtr)
  255 {
  256         int i, ret;
  257         unsigned int bs;
  258         RF_RaidDisk_t *disks;
  259         int num_spares_done;
  260 
  261         num_spares_done = 0;
  262 
  263         /*
  264          * The space for the spares should have already been allocated by
  265          * ConfigureDisks().
  266          */
  267 
  268         disks = &raidPtr->Disks[0][raidPtr->numCol];
  269         for (i = 0; i < raidPtr->numSpare; i++) {
  270                 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
  271                     &disks[i], 0, raidPtr->numCol + i);
  272                 if (ret)
  273                         goto fail;
  274                 if (disks[i].status != rf_ds_optimal) {
  275                         RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
  276                             &cfgPtr->spare_names[i][0]);
  277                 } else {
  278                         /* Change status to spare. */
  279                         disks[i].status = rf_ds_spare;
  280                         DPRINTF6("Spare Disk %d: dev %s numBlocks %ld"
  281                             " blockSize %d (%ld MB).\n", i, disks[i].devname,
  282                             (long int) disks[i].numBlocks, disks[i].blockSize,
  283                             (long int) disks[i].numBlocks *
  284                             disks[i].blockSize / 1024 / 1024);
  285                 }
  286                 num_spares_done++;
  287         }
  288 
  289         /* Check sizes and block sizes on spare disks. */
  290         bs = 1 << raidPtr->logBytesPerSector;
  291         for (i = 0; i < raidPtr->numSpare; i++) {
  292                 if (disks[i].blockSize != bs) {
  293                         RF_ERRORMSG3("Block size of %d on spare disk %s is"
  294                             " not the same as on other disks (%d).\n",
  295                             disks[i].blockSize, disks[i].devname, bs);
  296                         ret = EINVAL;
  297                         goto fail;
  298                 }
  299                 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
  300                         RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small"
  301                             " to serve as a spare (need %llu blocks).\n",
  302                             disks[i].devname, disks[i].numBlocks,
  303                             raidPtr->sectorsPerDisk);
  304                         ret = EINVAL;
  305                         goto fail;
  306                 } else
  307                         if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
  308                                 RF_ERRORMSG2("Warning: truncating spare disk"
  309                                     " %s to %llu blocks.\n", disks[i].devname,
  310                                     raidPtr->sectorsPerDisk);
  311 
  312                                 disks[i].numBlocks = raidPtr->sectorsPerDisk;
  313                         }
  314         }
  315 
  316         return (0);
  317 
  318 fail:
  319 
  320         /*
  321          * Release the hold on the main components. We've failed to allocate
  322          * a spare, and since we're failing, we need to free things...
  323          *
  324          * XXX Failing to allocate a spare is *not* that big of a deal...
  325          * We *can* survive without it, if need be, esp. if we get hot
  326          * adding working.
  327          * If we don't fail out here, then we need a way to remove this spare...
  328          * That should be easier to do here than if we are "live"...
  329          */
  330 
  331         rf_UnconfigureVnodes(raidPtr);
  332 
  333         return (ret);
  334 }
  335 
  336 int
  337 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
  338 {
  339         RF_RaidDisk_t **disks;
  340         int ret;
  341         int r;
  342 
  343         RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
  344             (RF_RaidDisk_t **), raidPtr->cleanupList);
  345         if (disks == NULL) {
  346                 ret = ENOMEM;
  347                 goto fail;
  348         }
  349         raidPtr->Disks = disks;
  350         /* Get space for the device-specific stuff... */
  351         RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
  352             sizeof(struct raidcinfo *), (struct raidcinfo **),
  353             raidPtr->cleanupList);
  354         if (raidPtr->raid_cinfo == NULL) {
  355                 ret = ENOMEM;
  356                 goto fail;
  357         }
  358 
  359         for (r = 0; r < raidPtr->numRow; r++) {
  360                 /*
  361                  * We allocate RF_MAXSPARE on the first row so that we
  362                  * have room to do hot-swapping of spares.
  363                  */
  364                 RF_CallocAndAdd(disks[r], raidPtr->numCol +
  365                     ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t),
  366                     (RF_RaidDisk_t *), raidPtr->cleanupList);
  367                 if (disks[r] == NULL) {
  368                         ret = ENOMEM;
  369                         goto fail;
  370                 }
  371                 /* Get more space for device specific stuff... */
  372                 RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol +
  373                     ((r == 0) ? raidPtr->numSpare : 0),
  374                     sizeof(struct raidcinfo), (struct raidcinfo *),
  375                     raidPtr->cleanupList);
  376                 if (raidPtr->raid_cinfo[r] == NULL) {
  377                         ret = ENOMEM;
  378                         goto fail;
  379                 }
  380         }
  381         return(0);
  382 fail:
  383         rf_UnconfigureVnodes(raidPtr);
  384 
  385         return(ret);
  386 }
  387 
  388 
  389 /* Configure a single disk during auto-configuration at boot. */
  390 int
  391 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
  392     RF_AutoConfig_t *auto_config)
  393 {
  394         RF_RaidDisk_t **disks;
  395         RF_RaidDisk_t *diskPtr;
  396         RF_RowCol_t r, c;
  397         RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
  398         int bs, ret;
  399         int numFailuresThisRow;
  400         int force;
  401         RF_AutoConfig_t *ac;
  402         int parity_good;
  403         int mod_counter;
  404         int mod_counter_found;
  405 
  406 #if     DEBUG
  407         printf("Starting autoconfiguration of RAID set...\n");
  408 #endif  /* DEBUG */
  409         force = cfgPtr->force;
  410 
  411         ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
  412         if (ret)
  413                 goto fail;
  414 
  415         disks = raidPtr->Disks;
  416 
  417         /* Assume the parity will be fine... */
  418         parity_good = RF_RAID_CLEAN;
  419 
  420         /* Check for mod_counters that are too low. */
  421         mod_counter_found = 0;
  422         ac = auto_config;
  423         while(ac!=NULL) {
  424                 if (mod_counter_found == 0) {
  425                         mod_counter = ac->clabel->mod_counter;
  426                         mod_counter_found = 1;
  427                 } else {
  428                         if (ac->clabel->mod_counter > mod_counter) {
  429                                 mod_counter = ac->clabel->mod_counter;
  430                         }
  431                 }
  432                 ac->flag = 0; /* Clear the general purpose flag. */
  433                 ac = ac->next;
  434         }
  435 
  436         for (r = 0; r < raidPtr->numRow; r++) {
  437                 numFailuresThisRow = 0;
  438                 for (c = 0; c < raidPtr->numCol; c++) {
  439                         diskPtr = &disks[r][c];
  440 
  441                         /* Find this row/col in the autoconfig. */
  442 #if     DEBUG
  443                         printf("Looking for %d,%d in autoconfig.\n", r, c);
  444 #endif  /* DEBUG */
  445                         ac = auto_config;
  446                         while(ac!=NULL) {
  447                                 if (ac->clabel == NULL) {
  448                                         /* Big-time bad news. */
  449                                         goto fail;
  450                                 }
  451                                 if ((ac->clabel->row == r) &&
  452                                     (ac->clabel->column == c) &&
  453                                     (ac->clabel->mod_counter == mod_counter)) {
  454                                         /* It's this one... */
  455                                         /*
  456                                          * Flag it as 'used', so we don't
  457                                          * free it later.
  458                                          */
  459                                         ac->flag = 1;
  460 #if     DEBUG
  461                                         printf("Found: %s at %d,%d.\n",
  462                                             ac->devname, r, c);
  463 #endif  /* DEBUG */
  464 
  465                                         break;
  466                                 }
  467                                 ac = ac->next;
  468                         }
  469 
  470                         if (ac == NULL) {
  471                                 /*
  472                                  * We didn't find an exact match with a
  473                                  * correct mod_counter above...  Can we
  474                                  * find one with an incorrect mod_counter
  475                                  * to use instead ?  (This one, if we find
  476                                  * it, will be marked as failed once the
  477                                  * set configures)
  478                                  */
  479 
  480                                 ac = auto_config;
  481                                 while(ac!=NULL) {
  482                                         if (ac->clabel == NULL) {
  483                                                 /* Big-time bad news. */
  484                                                 goto fail;
  485                                         }
  486                                         if ((ac->clabel->row == r) &&
  487                                             (ac->clabel->column == c)) {
  488                                                 /*
  489                                                  * It's this one...
  490                                                  * Flag it as 'used', so we
  491                                                  * don't free it later.
  492                                                  */
  493                                                 ac->flag = 1;
  494 #if     DEBUG
  495                                                 printf("Found(low mod_counter)"
  496                                                     ": %s at %d,%d.\n",
  497                                                     ac->devname, r, c);
  498 #endif  /* DEBUG */
  499 
  500                                                 break;
  501                                         }
  502                                         ac = ac->next;
  503                                 }
  504                         }
  505 
  506 
  507 
  508                         if (ac!=NULL) {
  509                                 /* Found it. Configure it... */
  510                                 diskPtr->blockSize = ac->clabel->blockSize;
  511                                 diskPtr->numBlocks = ac->clabel->numBlocks;
  512                                 /*
  513                                  * Note: rf_protectedSectors is already
  514                                  * factored into numBlocks here.
  515                                  */
  516                                 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
  517                                 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
  518 
  519                                 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
  520                                     ac->clabel, sizeof(*ac->clabel));
  521                                 snprintf(diskPtr->devname,
  522                                     sizeof diskPtr->devname, "/dev/%s",
  523                                     ac->devname);
  524 
  525                                 /*
  526                                  * Note the fact that this component was
  527                                  * autoconfigured. You'll need this info
  528                                  * later. Trust me :)
  529                                  */
  530                                 diskPtr->auto_configured = 1;
  531                                 diskPtr->dev = ac->dev;
  532 
  533                                 /*
  534                                  * We allow the user to specify that
  535                                  * only a fraction of the disks should
  536                                  * be used. This is just for debug: it
  537                                  * speeds up the parity scan.
  538                                  */
  539 
  540                                 diskPtr->numBlocks = diskPtr->numBlocks *
  541                                         rf_sizePercentage / 100;
  542 
  543                                 /*
  544                                  * XXX These will get set multiple times,
  545                                  * but since we're autoconfiguring, they'd
  546                                  * better be always the same each time !
  547                                  * If not, this is the least of your worries.
  548                                  */
  549 
  550                                 bs = diskPtr->blockSize;
  551                                 min_numblks = diskPtr->numBlocks;
  552 
  553                                 /*
  554                                  * This gets done multiple times, but that's
  555                                  * fine -- the serial number will be the same
  556                                  * for all components, guaranteed.
  557                                  */
  558                                 raidPtr->serial_number =
  559                                     ac->clabel->serial_number;
  560                                 /*
  561                                  * Check the last time the label
  562                                  * was modified.
  563                                  */
  564                                 if (ac->clabel->mod_counter != mod_counter) {
  565                                         /*
  566                                          * Even though we've filled in all
  567                                          * of the above, we don't trust
  568                                          * this component since it's
  569                                          * modification counter is not
  570                                          * in sync with the rest, and we really
  571                                          * consider it to be failed.
  572                                          */
  573                                         disks[r][c].status = rf_ds_failed;
  574                                         numFailuresThisRow++;
  575                                 } else {
  576                                         if (ac->clabel->clean != RF_RAID_CLEAN)
  577                                         {
  578                                                 parity_good = RF_RAID_DIRTY;
  579                                         }
  580                                 }
  581                         } else {
  582                                 /*
  583                                  * Didn't find it at all !!!
  584                                  * Component must really be dead.
  585                                  */
  586                                 disks[r][c].status = rf_ds_failed;
  587                                 snprintf(disks[r][c].devname,
  588                                     sizeof disks[r][c].devname, "component%d",
  589                                     r * raidPtr->numCol + c);
  590                                 numFailuresThisRow++;
  591                         }
  592                 }
  593                 /* XXX Fix for n-fault tolerant. */
  594                 /*
  595                  * XXX This should probably check to see how many failures
  596                  * we can handle for this configuration !
  597                  */
  598                 if (numFailuresThisRow > 0)
  599                         raidPtr->status[r] = rf_rs_degraded;
  600         }
  601 
  602         /* Close the device for the ones that didn't get used. */
  603 
  604         ac = auto_config;
  605         while(ac != NULL) {
  606                 if (ac->flag == 0) {
  607                         VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
  608                         vput(ac->vp);
  609                         ac->vp = NULL;
  610 #if     DEBUG
  611                         printf("Released %s from auto-config set.\n",
  612                             ac->devname);
  613 #endif  /* DEBUG */
  614                 }
  615                 ac = ac->next;
  616         }
  617 
  618         raidPtr->mod_counter = mod_counter;
  619 
  620         /* Note the state of the parity, if any. */
  621         raidPtr->parity_good = parity_good;
  622         raidPtr->sectorsPerDisk = min_numblks;
  623         raidPtr->logBytesPerSector = ffs(bs) - 1;
  624         raidPtr->bytesPerSector = bs;
  625         raidPtr->sectorMask = bs - 1;
  626         return (0);
  627 
  628 fail:
  629 
  630         rf_UnconfigureVnodes(raidPtr);
  631 
  632         return (ret);
  633 
  634 }
  635 
  636 /* Configure a single disk in the array. */
  637 int
  638 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
  639     RF_RowCol_t row, RF_RowCol_t col)
  640 {
  641         char *p;
  642         int retcode;
  643 
  644         struct partinfo dpart;
  645         struct vnode *vp;
  646         struct vattr va;
  647         struct proc *proc;
  648         int error;
  649 
  650         retcode = 0;
  651         p = rf_find_non_white(buf);
  652         if (*buf != '\0' && p[strlen(p) - 1] == '\n') {
  653                 /* Strip off the newline. */
  654                 p[strlen(p) - 1] = '\0';
  655         }
  656         (void) strlcpy(diskPtr->devname, p, sizeof diskPtr->devname);
  657 
  658         proc = raidPtr->engine_thread;
  659 
  660         /* Let's start by claiming the component is fine and well... */
  661         diskPtr->status = rf_ds_optimal;
  662 
  663         raidPtr->raid_cinfo[row][col].ci_vp = NULL;
  664         raidPtr->raid_cinfo[row][col].ci_dev = NULL;
  665 
  666         error = raidlookup(diskPtr->devname, curproc, &vp);
  667         if (error) {
  668                 printf("raidlookup on device: %s failed !\n", diskPtr->devname);
  669                 if (error == ENXIO) {
  670                         /* The component isn't there...  Must be dead :-( */
  671                         diskPtr->status = rf_ds_failed;
  672                 } else {
  673                         return (error);
  674                 }
  675         }
  676         if (diskPtr->status == rf_ds_optimal) {
  677 
  678                 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
  679                         return (error);
  680                 }
  681                 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD,
  682                     proc->p_ucred, proc);
  683                 if (error) {
  684                         return (error);
  685                 }
  686                 diskPtr->blockSize = dpart.disklab->d_secsize;
  687 
  688                 diskPtr->numBlocks = DL_GETPSIZE(dpart.part) - rf_protectedSectors;
  689                 diskPtr->partitionSize = DL_GETPSIZE(dpart.part);
  690 
  691                 raidPtr->raid_cinfo[row][col].ci_vp = vp;
  692                 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
  693 
  694                 /* This component was not automatically configured. */
  695                 diskPtr->auto_configured = 0;
  696                 diskPtr->dev = va.va_rdev;
  697 
  698                 /*
  699                  * We allow the user to specify that only a fraction of the
  700                  * disks should be used. This is just for debug: it speeds up
  701                  * the parity scan.
  702                  */
  703                 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage
  704                     / 100;
  705         }
  706         return (0);
  707 }
  708 
  709 void
  710 rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
  711     RF_ComponentLabel_t *ci_label)
  712 {
  713 
  714         printf("raid%d: Component %s being configured at row: %d col: %d\n",
  715             raidPtr->raidid, dev_name, row, column);
  716         printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
  717             ci_label->row, ci_label->column, ci_label->num_rows,
  718             ci_label->num_columns);
  719         printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
  720             ci_label->version, ci_label->serial_number, ci_label->mod_counter);
  721         printf("         Clean: %s Status: %d\n",
  722             ci_label->clean ? "Yes" : "No", ci_label->status);
  723 }
  724 
  725 int
  726 rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
  727     RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter)
  728 {
  729         int fatal_error = 0;
  730 
  731         if (serial_number != ci_label->serial_number) {
  732                 printf("%s has a different serial number: %d %d.\n",
  733                     dev_name, serial_number, ci_label->serial_number);
  734                 fatal_error = 1;
  735         }
  736         if (mod_counter != ci_label->mod_counter) {
  737                 printf("%s has a different modfication count: %d %d.\n",
  738                     dev_name, mod_counter, ci_label->mod_counter);
  739         }
  740 
  741         if (row != ci_label->row) {
  742                 printf("Row out of alignment for: %s.\n", dev_name);
  743                 fatal_error = 1;
  744         }
  745         if (column != ci_label->column) {
  746                 printf("Column out of alignment for: %s.\n", dev_name);
  747                 fatal_error = 1;
  748         }
  749         if (raidPtr->numRow != ci_label->num_rows) {
  750                 printf("Number of rows do not match for: %s.\n", dev_name);
  751                 fatal_error = 1;
  752         }
  753         if (raidPtr->numCol != ci_label->num_columns) {
  754                 printf("Number of columns do not match for: %s.\n", dev_name);
  755                 fatal_error = 1;
  756         }
  757         if (ci_label->clean == 0) {
  758                 /* It's not clean, but that's not fatal. */
  759                 printf("%s is not clean !\n", dev_name);
  760         }
  761         return(fatal_error);
  762 }
  763 
  764 
  765 /*
  766  *
  767  * rf_CheckLabels() - Check all the component labels for consistency.
  768  * Return an error if there is anything major amiss.
  769  *
  770  */
  771 
  772 int
  773 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
  774 {
  775         int r, c;
  776         char *dev_name;
  777         RF_ComponentLabel_t *ci_label;
  778         int serial_number = 0;
  779         int mod_number = 0;
  780         int fatal_error = 0;
  781         int mod_values[4];
  782         int mod_count[4];
  783         int ser_values[4];
  784         int ser_count[4];
  785         int num_ser;
  786         int num_mod;
  787         int i;
  788         int found;
  789         int hosed_row;
  790         int hosed_column;
  791         int too_fatal;
  792         int parity_good;
  793         int force;
  794 
  795         hosed_row = -1;
  796         hosed_column = -1;
  797         too_fatal = 0;
  798         force = cfgPtr->force;
  799 
  800         /*
  801          * We're going to try to be a little intelligent here. If one
  802          * component's label is bogus, and we can identify that it's the
  803          * *only* one that's gone, we'll mark it as "failed" and allow
  804          * the configuration to proceed. This will be the *only* case
  805          * that we'll proceed if there would be (otherwise) fatal errors.
  806          *
  807          * Basically we simply keep a count of how many components had
  808          * what serial number. If all but one agree, we simply mark
  809          * the disagreeing component as being failed, and allow
  810          * things to come up "normally".
  811          *
  812          * We do this first for serial numbers, and then for "mod_counter".
  813          *
  814          */
  815 
  816         num_ser = 0;
  817         num_mod = 0;
  818         for (r = 0; r < raidPtr->numRow && !fatal_error; r++) {
  819                 for (c = 0; c < raidPtr->numCol; c++) {
  820                         ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
  821                         found = 0;
  822                         for(i = 0; i < num_ser; i++) {
  823                                 if (ser_values[i] == ci_label->serial_number) {
  824                                         ser_count[i]++;
  825                                         found = 1;
  826                                         break;
  827                                 }
  828                         }
  829                         if (!found) {
  830                                 ser_values[num_ser] = ci_label->serial_number;
  831                                 ser_count[num_ser] = 1;
  832                                 num_ser++;
  833                                 if (num_ser > 2) {
  834                                         fatal_error = 1;
  835                                         break;
  836                                 }
  837                         }
  838                         found = 0;
  839                         for(i = 0; i < num_mod; i++) {
  840                                 if (mod_values[i] == ci_label->mod_counter) {
  841                                         mod_count[i]++;
  842                                         found = 1;
  843                                         break;
  844                                 }
  845                         }
  846                         if (!found) {
  847                                 mod_values[num_mod] = ci_label->mod_counter;
  848                                 mod_count[num_mod] = 1;
  849                                 num_mod++;
  850                                 if (num_mod > 2) {
  851                                         fatal_error = 1;
  852                                         break;
  853                                 }
  854                         }
  855                 }
  856         }
  857 #if     DEBUG
  858         printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
  859         for(i = 0; i < num_ser; i++) {
  860                 printf("%d %d\n", ser_values[i], ser_count[i]);
  861         }
  862         printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
  863         for(i = 0; i < num_mod; i++) {
  864                 printf("%d %d\n", mod_values[i], mod_count[i]);
  865         }
  866 #endif  /* DEBUG */
  867         serial_number = ser_values[0];
  868         if (num_ser == 2) {
  869                 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
  870                         /* Locate the maverick component. */
  871                         if (ser_count[1] > ser_count[0]) {
  872                                 serial_number = ser_values[1];
  873                         }
  874                         for (r = 0; r < raidPtr->numRow; r++) {
  875                                 for (c = 0; c < raidPtr->numCol; c++) {
  876                                         ci_label =
  877                                             &raidPtr->raid_cinfo[r][c].ci_label;
  878                                         if (serial_number !=
  879                                             ci_label->serial_number) {
  880                                                 hosed_row = r;
  881                                                 hosed_column = c;
  882                                                 break;
  883                                         }
  884                                 }
  885                         }
  886                         printf("Hosed component: %s.\n",
  887                             &cfgPtr->devnames[hosed_row][hosed_column][0]);
  888                         if (!force) {
  889                                 /*
  890                                  * We'll fail this component, as if there are
  891                                  * other major errors, we aren't forcing things
  892                                  * and we'll abort the config anyways.
  893                                  */
  894                                 raidPtr->Disks[hosed_row][hosed_column].status
  895                                     = rf_ds_failed;
  896                                 raidPtr->numFailures++;
  897                                 raidPtr->status[hosed_row] = rf_rs_degraded;
  898                         }
  899                 } else {
  900                         too_fatal = 1;
  901                 }
  902                 if (cfgPtr->parityConfig == '0') {
  903                         /*
  904                          * We've identified two different serial numbers.
  905                          * RAID 0 can't cope with that, so we'll punt.
  906                          */
  907                         too_fatal = 1;
  908                 }
  909 
  910         }
  911 
  912         /*
  913          * Record the serial number for later. If we bail later, setting
  914          * this doesn't matter, otherwise we've got the best guess at the
  915          * correct serial number.
  916          */
  917         raidPtr->serial_number = serial_number;
  918 
  919         mod_number = mod_values[0];
  920         if (num_mod == 2) {
  921                 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
  922                         /* Locate the maverick component. */
  923                         if (mod_count[1] > mod_count[0]) {
  924                                 mod_number = mod_values[1];
  925                         } else if (mod_count[1] < mod_count[0]) {
  926                                 mod_number = mod_values[0];
  927                         } else {
  928                                 /*
  929                                  * Counts of different modification values
  930                                  * are the same. Assume greater value is
  931                                  * the correct one, all other things
  932                                  * considered.
  933                                  */
  934                                 if (mod_values[0] > mod_values[1]) {
  935                                         mod_number = mod_values[0];
  936                                 } else {
  937                                         mod_number = mod_values[1];
  938                                 }
  939 
  940                         }
  941                         for (r = 0; r < raidPtr->numRow && !too_fatal; r++) {
  942                                 for (c = 0; c < raidPtr->numCol; c++) {
  943                                         ci_label =
  944                                             &raidPtr->raid_cinfo[r][c].ci_label;
  945                                         if (mod_number !=
  946                                             ci_label->mod_counter) {
  947                                                 if ((hosed_row == r) &&
  948                                                     (hosed_column == c)) {
  949                                                         /*
  950                                                          * Same one. Can
  951                                                          * deal with it.
  952                                                          */
  953                                                 } else {
  954                                                         hosed_row = r;
  955                                                         hosed_column = c;
  956                                                         if (num_ser != 1) {
  957                                                                 too_fatal = 1;
  958                                                                 break;
  959                                                         }
  960                                                 }
  961                                         }
  962                                 }
  963                         }
  964                         printf("Hosed component: %s.\n",
  965                             &cfgPtr->devnames[hosed_row][hosed_column][0]);
  966                         if (!force) {
  967                                 /*
  968                                  * We'll fail this component, as if there are
  969                                  * other major errors, we aren't forcing things
  970                                  * and we'll abort the config anyways.
  971                                  */
  972                                 if (raidPtr
  973                                     ->Disks[hosed_row][hosed_column].status !=
  974                                     rf_ds_failed) {
  975                                         raidPtr->Disks[hosed_row]
  976                                             [hosed_column].status =
  977                                             rf_ds_failed;
  978                                         raidPtr->numFailures++;
  979                                         raidPtr->status[hosed_row] =
  980                                             rf_rs_degraded;
  981                                 }
  982                         }
  983                 } else {
  984                         too_fatal = 1;
  985                 }
  986                 if (cfgPtr->parityConfig == '0') {
  987                         /*
  988                          * We've identified two different mod counters.
  989                          * RAID 0 can't cope with that, so we'll punt.
  990                          */
  991                         too_fatal = 1;
  992                 }
  993         }
  994 
  995         raidPtr->mod_counter = mod_number;
  996 
  997         if (too_fatal) {
  998                 /*
  999                  * We've had both a serial number mismatch, and a mod_counter
 1000                  * mismatch -- and they involved two different components !!!
 1001                  * Bail -- make things fail so that the user must force
 1002                  * the issue...
 1003                  */
 1004                 hosed_row = -1;
 1005                 hosed_column = -1;
 1006         }
 1007 
 1008         if (num_ser > 2) {
 1009                 printf("raid%d: Too many different serial numbers !\n",
 1010                     raidPtr->raidid);
 1011         }
 1012 
 1013         if (num_mod > 2) {
 1014                 printf("raid%d: Too many different mod counters !\n",
 1015                     raidPtr->raidid);
 1016         }
 1017 
 1018         /*
 1019          * We start by assuming the parity will be good, and flee from
 1020          * that notion at the slightest sign of trouble.
 1021          */
 1022 
 1023         parity_good = RF_RAID_CLEAN;
 1024         for (r = 0; r < raidPtr->numRow; r++) {
 1025                 for (c = 0; c < raidPtr->numCol; c++) {
 1026                         dev_name = &cfgPtr->devnames[r][c][0];
 1027                         ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
 1028 
 1029                         if ((r == hosed_row) && (c == hosed_column)) {
 1030                                 printf("raid%d: Ignoring %s.\n",
 1031                                     raidPtr->raidid, dev_name);
 1032                         } else {
 1033                                 rf_print_label_status(raidPtr, r, c, dev_name,
 1034                                     ci_label);
 1035                                 if (rf_check_label_vitals(raidPtr, r, c,
 1036                                      dev_name, ci_label, serial_number,
 1037                                      mod_number)) {
 1038                                         fatal_error = 1;
 1039                                 }
 1040                                 if (ci_label->clean != RF_RAID_CLEAN) {
 1041                                         parity_good = RF_RAID_DIRTY;
 1042                                 }
 1043                         }
 1044                 }
 1045         }
 1046         if (fatal_error) {
 1047                 parity_good = RF_RAID_DIRTY;
 1048         }
 1049 
 1050         /* We note the state of the parity. */
 1051         raidPtr->parity_good = parity_good;
 1052 
 1053         return(fatal_error);
 1054 }
 1055 
 1056 int
 1057 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
 1058 {
 1059         RF_RaidDisk_t *disks;
 1060         RF_DiskQueue_t *spareQueues;
 1061         int ret;
 1062         unsigned int bs;
 1063         int spare_number;
 1064 
 1065 #if 0
 1066         printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare);
 1067         printf("Num col: %d.\n", raidPtr->numCol);
 1068 #endif
 1069         if (raidPtr->numSpare >= RF_MAXSPARE) {
 1070                 RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare);
 1071                 return(EINVAL);
 1072         }
 1073 
 1074         RF_LOCK_MUTEX(raidPtr->mutex);
 1075 
 1076         /* The beginning of the spares... */
 1077         disks = &raidPtr->Disks[0][raidPtr->numCol];
 1078 
 1079         spare_number = raidPtr->numSpare;
 1080 
 1081         ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
 1082             &disks[spare_number], 0, raidPtr->numCol + spare_number);
 1083 
 1084         if (ret)
 1085                 goto fail;
 1086         if (disks[spare_number].status != rf_ds_optimal) {
 1087                 RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n",
 1088                     sparePtr->component_name);
 1089                 ret = EINVAL;
 1090                 goto fail;
 1091         } else {
 1092                 disks[spare_number].status = rf_ds_spare;
 1093                 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d"
 1094                     " (%ld MB).\n", spare_number, disks[spare_number].devname,
 1095                     (long int) disks[spare_number].numBlocks,
 1096                     disks[spare_number].blockSize,
 1097                     (long int) disks[spare_number].numBlocks *
 1098                      disks[spare_number].blockSize / 1024 / 1024);
 1099         }
 1100 
 1101 
 1102         /* Check sizes and block sizes on the spare disk. */
 1103         bs = 1 << raidPtr->logBytesPerSector;
 1104         if (disks[spare_number].blockSize != bs) {
 1105                 RF_ERRORMSG3("Block size of %d on spare disk %s is not"
 1106                     " the same as on other disks (%d).\n",
 1107                     disks[spare_number].blockSize,
 1108                     disks[spare_number].devname, bs);
 1109                 ret = EINVAL;
 1110                 goto fail;
 1111         }
 1112         if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
 1113                 RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small to serve"
 1114                     " as a spare (need %llu blocks).\n",
 1115                     disks[spare_number].devname, disks[spare_number].numBlocks,
 1116                     raidPtr->sectorsPerDisk);
 1117                 ret = EINVAL;
 1118                 goto fail;
 1119         } else {
 1120                 if (disks[spare_number].numBlocks >
 1121                     raidPtr->sectorsPerDisk) {
 1122                         RF_ERRORMSG2("Warning: truncating spare disk %s to %llu"
 1123                             " blocks.\n", disks[spare_number].devname,
 1124                             raidPtr->sectorsPerDisk);
 1125 
 1126                         disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
 1127                 }
 1128         }
 1129 
 1130         spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
 1131         ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number],
 1132             0, raidPtr->numCol + spare_number, raidPtr->qType,
 1133             raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol +
 1134              spare_number].dev, raidPtr->maxOutstanding,
 1135             &raidPtr->shutdownList, raidPtr->cleanupList);
 1136 
 1137 
 1138         raidPtr->numSpare++;
 1139         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1140         return (0);
 1141 
 1142 fail:
 1143         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1144         return(ret);
 1145 }
 1146 
 1147 int
 1148 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
 1149 {
 1150         int spare_number;
 1151 
 1152         if (raidPtr->numSpare == 0) {
 1153                 printf("No spares to remove !\n");
 1154                 return(EINVAL);
 1155         }
 1156 
 1157         spare_number = sparePtr->column;
 1158 
 1159         return(EINVAL); /* XXX Not implemented yet. */
 1160 #if 0
 1161         if (spare_number < 0 || spare_number > raidPtr->numSpare) {
 1162                 return(EINVAL);
 1163         }
 1164 
 1165         /* Verify that this spare isn't in use... */
 1166 
 1167         /* It's gone... */
 1168 
 1169         raidPtr->numSpare--;
 1170 
 1171         return (0);
 1172 #endif
 1173 }
 1174 
 1175 int
 1176 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
 1177 {
 1178         RF_RaidDisk_t *disks;
 1179 
 1180         if ((component->row < 0) ||
 1181             (component->row >= raidPtr->numRow) ||
 1182             (component->column < 0) ||
 1183             (component->column >= raidPtr->numCol)) {
 1184                 return(EINVAL);
 1185         }
 1186 
 1187         disks = &raidPtr->Disks[component->row][component->column];
 1188 
 1189         /* 1. This component must be marked as 'failed'. */
 1190 
 1191         return(EINVAL); /* Not implemented yet. */
 1192 }
 1193 
 1194 int
 1195 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
 1196 {
 1197 
 1198         /*
 1199          * Issues here include how to 'move' this in if there is IO
 1200          * taking place (e.g. component queues and such).
 1201          */
 1202 
 1203         return(EINVAL); /* Not implemented yet. */
 1204 }
/* [<][>][^][v][top][bottom][index][help] */
root/dev/raidframe/rf_disks.c

DEFINITIONS