root/ufs/ffs/ffs_alloc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ffs_alloc
  2. ffs_realloccg
  3. ffs1_reallocblks
  4. ffs2_reallocblks
  5. ffs_reallocblks
  6. ffs_inode_alloc
  7. ffs_dirpref
  8. ffs1_blkpref
  9. ffs2_blkpref
  10. ffs_hashalloc
  11. ffs_fragextend
  12. ffs_alloccg
  13. ffs_alloccgblk
  14. ffs_clusteralloc
  15. ffs_nodealloccg
  16. ffs_blkfree
  17. ffs_inode_free
  18. ffs_freefile
  19. ffs_checkblk
  20. ffs_mapsearch
  21. ffs_clusteracct

    1 /*      $OpenBSD: ffs_alloc.c,v 1.78 2007/06/22 13:59:12 thib Exp $     */
    2 /*      $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $   */
    3 
    4 /*
    5  * Copyright (c) 2002 Networks Associates Technology, Inc.
    6  * All rights reserved.
    7  *
    8  * This software was developed for the FreeBSD Project by Marshall
    9  * Kirk McKusick and Network Associates Laboratories, the Security
   10  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
   11  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
   12  * research program.
   13  *
   14  * Copyright (c) 1982, 1986, 1989, 1993
   15  *      The Regents of the University of California.  All rights reserved.
   16  *
   17  * Redistribution and use in source and binary forms, with or without
   18  * modification, are permitted provided that the following conditions
   19  * are met:
   20  * 1. Redistributions of source code must retain the above copyright
   21  *    notice, this list of conditions and the following disclaimer.
   22  * 2. Redistributions in binary form must reproduce the above copyright
   23  *    notice, this list of conditions and the following disclaimer in the
   24  *    documentation and/or other materials provided with the distribution.
   25  * 3. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
   42  */
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/buf.h>
   47 #include <sys/proc.h>
   48 #include <sys/vnode.h>
   49 #include <sys/mount.h>
   50 #include <sys/kernel.h>
   51 #include <sys/syslog.h>
   52 #include <sys/stdint.h>
   53 
   54 #include <uvm/uvm_extern.h>
   55 
   56 #include <dev/rndvar.h>
   57 
   58 #include <ufs/ufs/quota.h>
   59 #include <ufs/ufs/inode.h>
   60 #include <ufs/ufs/ufsmount.h>
   61 #include <ufs/ufs/ufs_extern.h>
   62 
   63 #include <ufs/ffs/fs.h>
   64 #include <ufs/ffs/ffs_extern.h>
   65 
   66 #define ffs_fserr(fs, uid, cp) do {                             \
   67         log(LOG_ERR, "uid %u on %s: %s\n", (uid),               \
   68             (fs)->fs_fsmnt, (cp));                              \
   69 } while (0)
   70 
   71 daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
   72 daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
   73 daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
   74 ino_t   ffs_dirpref(struct inode *);
   75 daddr_t ffs_fragextend(struct inode *, int, long, int, int);
   76 u_long  ffs_hashalloc(struct inode *, int, long, int,
   77     daddr_t (*)(struct inode *, int, daddr_t, int));
   78 daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
   79 daddr_t ffs_mapsearch(struct fs *, struct cg *, daddr_t, int);
   80 
   81 int ffs1_reallocblks(void *);
   82 #ifdef FFS2
   83 int ffs2_reallocblks(void *);
   84 #endif
   85 
   86 #ifdef DIAGNOSTIC
   87 int      ffs_checkblk(struct inode *, daddr_t, long);
   88 #endif
   89 
   90 /*
   91  * Allocate a block in the file system.
   92  *
   93  * The size of the requested block is given, which must be some
   94  * multiple of fs_fsize and <= fs_bsize.
   95  * A preference may be optionally specified. If a preference is given
   96  * the following hierarchy is used to allocate a block:
   97  *   1) allocate the requested block.
   98  *   2) allocate a rotationally optimal block in the same cylinder.
   99  *   3) allocate a block in the same cylinder group.
  100  *   4) quadratically rehash into other cylinder groups, until an
  101  *      available block is located.
  102  * If no block preference is given the following hierarchy is used
  103  * to allocate a block:
  104  *   1) allocate a block in the cylinder group that contains the
  105  *      inode for the file.
  106  *   2) quadratically rehash into other cylinder groups, until an
  107  *      available block is located.
  108  */
  109 int
  110 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
  111     struct ucred *cred, daddr_t *bnp)
  112 {
  113         struct fs *fs;
  114         daddr_t bno;
  115         int cg;
  116         int error;
  117 
  118         *bnp = 0;
  119         fs = ip->i_fs;
  120 #ifdef DIAGNOSTIC
  121         if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
  122                 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
  123                     ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
  124                 panic("ffs_alloc: bad size");
  125         }
  126         if (cred == NOCRED)
  127                 panic("ffs_alloc: missing credential");
  128 #endif /* DIAGNOSTIC */
  129         if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
  130                 goto nospace;
  131         if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
  132                 goto nospace;
  133 
  134         if ((error = ufs_quota_alloc_blocks(ip, btodb(size), cred)) != 0)
  135                 return (error);
  136 
  137         /*
  138          * Start allocation in the preferred block's cylinder group or
  139          * the file's inode's cylinder group if no preferred block was
  140          * specified.
  141          */
  142         if (bpref >= fs->fs_size)
  143                 bpref = 0;
  144         if (bpref == 0)
  145                 cg = ino_to_cg(fs, ip->i_number);
  146         else
  147                 cg = dtog(fs, bpref);
  148 
  149         /* Try allocating a block. */
  150         bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, ffs_alloccg);
  151         if (bno > 0) {
  152                 /* allocation successful, update inode data */
  153                 DIP_ADD(ip, blocks, btodb(size));
  154                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  155                 *bnp = bno;
  156                 return (0);
  157         }
  158 
  159         /* Restore user's disk quota because allocation failed. */
  160         (void) ufs_quota_free_blocks(ip, btodb(size), cred);
  161 
  162 nospace:
  163         ffs_fserr(fs, cred->cr_uid, "file system full");
  164         uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
  165         return (ENOSPC);
  166 }
  167 
  168 /*
  169  * Reallocate a fragment to a bigger size
  170  *
  171  * The number and size of the old block is given, and a preference
  172  * and new size is also specified. The allocator attempts to extend
  173  * the original block. Failing that, the regular block allocator is
  174  * invoked to get an appropriate block.
  175  */
  176 int
  177 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
  178     int nsize, struct ucred *cred, struct buf **bpp, daddr_t *blknop)
  179 {
  180         struct fs *fs;
  181         struct buf *bp = NULL;
  182         daddr_t quota_updated = 0;
  183         int cg, request, error;
  184         daddr_t bprev, bno;
  185 
  186         if (bpp != NULL)
  187                 *bpp = NULL;
  188         fs = ip->i_fs;
  189 #ifdef DIAGNOSTIC
  190         if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
  191             (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
  192                 printf(
  193                     "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
  194                     ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
  195                 panic("ffs_realloccg: bad size");
  196         }
  197         if (cred == NOCRED)
  198                 panic("ffs_realloccg: missing credential");
  199 #endif /* DIAGNOSTIC */
  200         if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
  201                 goto nospace;
  202 
  203         bprev = DIP(ip, db[lbprev]);
  204 
  205         if (bprev == 0) {
  206                 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
  207                     ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
  208                 panic("ffs_realloccg: bad bprev");
  209         }
  210 
  211         /*
  212          * Allocate the extra space in the buffer.
  213          */
  214         if (bpp != NULL) {
  215                 if ((error = bread(ITOV(ip), lbprev, fs->fs_bsize,
  216                     NOCRED, &bp)) != 0)
  217                         goto error;
  218                 bp->b_bcount = osize;
  219         }
  220 
  221         if ((error = ufs_quota_alloc_blocks(ip, btodb(nsize - osize), cred))
  222             != 0)
  223                 goto error;
  224 
  225         quota_updated = btodb(nsize - osize);
  226 
  227         /*
  228          * Check for extension in the existing location.
  229          */
  230         cg = dtog(fs, bprev);
  231         if ((bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) != 0) {
  232                 DIP_ADD(ip, blocks, btodb(nsize - osize));
  233                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  234                 if (bpp != NULL) {
  235                         if (bp->b_blkno != fsbtodb(fs, bno))
  236                                 panic("ffs_realloccg: bad blockno");
  237 #ifdef DIAGNOSTIC
  238                         if (nsize > bp->b_bufsize)
  239                                 panic("ffs_realloccg: small buf");
  240 #endif
  241                         bp->b_bcount = nsize;
  242                         bp->b_flags |= B_DONE;
  243                         bzero(bp->b_data + osize, (u_int)nsize - osize);
  244                         *bpp = bp;
  245                 }
  246                 if (blknop != NULL) {
  247                         *blknop = bno;
  248                 }
  249                 return (0);
  250         }
  251         /*
  252          * Allocate a new disk location.
  253          */
  254         if (bpref >= fs->fs_size)
  255                 bpref = 0;
  256         switch (fs->fs_optim) {
  257         case FS_OPTSPACE:
  258                 /*
  259                  * Allocate an exact sized fragment. Although this makes
  260                  * best use of space, we will waste time relocating it if
  261                  * the file continues to grow. If the fragmentation is
  262                  * less than half of the minimum free reserve, we choose
  263                  * to begin optimizing for time.
  264                  */
  265                 request = nsize;
  266                 if (fs->fs_minfree < 5 ||
  267                     fs->fs_cstotal.cs_nffree >
  268                     fs->fs_dsize * fs->fs_minfree / (2 * 100))
  269                         break;
  270                 fs->fs_optim = FS_OPTTIME;
  271                 break;
  272         case FS_OPTTIME:
  273                 /*
  274                  * At this point we have discovered a file that is trying to
  275                  * grow a small fragment to a larger fragment. To save time,
  276                  * we allocate a full sized block, then free the unused portion.
  277                  * If the file continues to grow, the `ffs_fragextend' call
  278                  * above will be able to grow it in place without further
  279                  * copying. If aberrant programs cause disk fragmentation to
  280                  * grow within 2% of the free reserve, we choose to begin
  281                  * optimizing for space.
  282                  */
  283                 request = fs->fs_bsize;
  284                 if (fs->fs_cstotal.cs_nffree <
  285                     fs->fs_dsize * (fs->fs_minfree - 2) / 100)
  286                         break;
  287                 fs->fs_optim = FS_OPTSPACE;
  288                 break;
  289         default:
  290                 printf("dev = 0x%x, optim = %d, fs = %s\n",
  291                     ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
  292                 panic("ffs_realloccg: bad optim");
  293                 /* NOTREACHED */
  294         }
  295         bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
  296                                      ffs_alloccg);
  297         if (bno <= 0) 
  298                 goto nospace;
  299 
  300         (void) uvm_vnp_uncache(ITOV(ip));
  301         if (!DOINGSOFTDEP(ITOV(ip)))
  302                 ffs_blkfree(ip, bprev, (long)osize);
  303         if (nsize < request)
  304                 ffs_blkfree(ip, bno + numfrags(fs, nsize),
  305                     (long)(request - nsize));
  306         DIP_ADD(ip, blocks, btodb(nsize - osize));
  307         ip->i_flag |= IN_CHANGE | IN_UPDATE;
  308         if (bpp != NULL) {
  309                 bp->b_blkno = fsbtodb(fs, bno);
  310 #ifdef DIAGNOSTIC
  311                 if (nsize > bp->b_bufsize)
  312                         panic("ffs_realloccg: small buf 2");
  313 #endif
  314                 bp->b_bcount = nsize;
  315                 bp->b_flags |= B_DONE;
  316                 bzero(bp->b_data + osize, (u_int)nsize - osize);
  317                 *bpp = bp;
  318         }
  319         if (blknop != NULL) {
  320                 *blknop = bno;
  321         }
  322         return (0);
  323 
  324 nospace:
  325         /*
  326          * no space available
  327          */
  328         ffs_fserr(fs, cred->cr_uid, "file system full");
  329         uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
  330         error = ENOSPC;
  331 
  332 error:
  333         if (bp != NULL) {
  334                 brelse(bp);
  335                 bp = NULL;
  336         }
  337 
  338         /*
  339          * Restore user's disk quota because allocation failed.
  340          */
  341         if (quota_updated != 0)
  342                 (void)ufs_quota_free_blocks(ip, quota_updated, cred);
  343                 
  344         return error;
  345 }
  346 
  347 /*
  348  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  349  *
  350  * The vnode and an array of buffer pointers for a range of sequential
  351  * logical blocks to be made contiguous are given. The allocator attempts
  352  * to find a range of sequential blocks starting as close as possible to
  353  * an fs_rotdelay offset from the end of the allocation for the logical
  354  * block immediately preceding the current range. If successful, the
  355  * physical block numbers in the buffer pointers and in the inode are
  356  * changed to reflect the new allocation. If unsuccessful, the allocation
  357  * is left unchanged. The success in doing the reallocation is returned.
  358  * Note that the error return is not reflected back to the user. Rather
  359  * the previous block allocation will be used.
  360  */
  361 
  362 int doasyncfree = 1;
  363 int doreallocblks = 1;
  364 int prtrealloc = 0;
  365 
  366 int
  367 ffs1_reallocblks(void *v)
  368 {
  369         struct vop_reallocblks_args *ap = v;
  370         struct fs *fs;
  371         struct inode *ip;
  372         struct vnode *vp;
  373         struct buf *sbp, *ebp;
  374         daddr_t *bap, *sbap, *ebap = NULL;
  375         struct cluster_save *buflist;
  376         daddr_t start_lbn, end_lbn, soff, newblk, blkno;
  377         struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
  378         int i, len, start_lvl, end_lvl, pref, ssize;
  379 
  380         vp = ap->a_vp;
  381         ip = VTOI(vp);
  382         fs = ip->i_fs;
  383         if (fs->fs_contigsumsize <= 0)
  384                 return (ENOSPC);
  385         buflist = ap->a_buflist;
  386         len = buflist->bs_nchildren;
  387         start_lbn = buflist->bs_children[0]->b_lblkno;
  388         end_lbn = start_lbn + len - 1;
  389 
  390 #ifdef DIAGNOSTIC
  391         for (i = 0; i < len; i++)
  392                 if (!ffs_checkblk(ip,
  393                    dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
  394                         panic("ffs1_reallocblks: unallocated block 1");
  395                 
  396         for (i = 1; i < len; i++)
  397                 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
  398                         panic("ffs1_reallocblks: non-logical cluster");
  399 
  400         blkno = buflist->bs_children[0]->b_blkno;
  401         ssize = fsbtodb(fs, fs->fs_frag);
  402         for (i = 1; i < len - 1; i++)
  403                 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
  404                         panic("ffs1_reallocblks: non-physical cluster %d", i);
  405 #endif
  406         /*
  407          * If the latest allocation is in a new cylinder group, assume that
  408          * the filesystem has decided to move and do not force it back to
  409          * the previous cylinder group.
  410          */
  411         if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
  412             dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
  413                 return (ENOSPC);
  414         if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
  415             ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
  416                 return (ENOSPC);
  417         /*
  418          * Get the starting offset and block map for the first block.
  419          */
  420         if (start_lvl == 0) {
  421                 sbap = &ip->i_ffs1_db[0];
  422                 soff = start_lbn;
  423         } else {
  424                 idp = &start_ap[start_lvl - 1];
  425                 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
  426                         brelse(sbp);
  427                         return (ENOSPC);
  428                 }
  429                 sbap = (daddr_t *)sbp->b_data;
  430                 soff = idp->in_off;
  431         }
  432         /*
  433          * Find the preferred location for the cluster.
  434          */
  435         pref = ffs1_blkpref(ip, start_lbn, soff, sbap);
  436         /*
  437          * If the block range spans two block maps, get the second map.
  438          */
  439         if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
  440                 ssize = len;
  441         } else {
  442 #ifdef DIAGNOSTIC
  443                 if (start_lvl > 1 &&
  444                     start_ap[start_lvl-1].in_lbn == idp->in_lbn)
  445                         panic("ffs1_reallocblk: start == end");
  446 #endif
  447                 ssize = len - (idp->in_off + 1);
  448                 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
  449                         goto fail;
  450                 ebap = (daddr_t *)ebp->b_data;
  451         }
  452         /*
  453          * Search the block map looking for an allocation of the desired size.
  454          */
  455         if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
  456             len, ffs_clusteralloc)) == 0)
  457                 goto fail;
  458         /*
  459          * We have found a new contiguous block.
  460          *
  461          * First we have to replace the old block pointers with the new
  462          * block pointers in the inode and indirect blocks associated
  463          * with the file.
  464          */
  465 #ifdef DEBUG
  466         if (prtrealloc)
  467                 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
  468                     start_lbn, end_lbn);
  469 #endif
  470         blkno = newblk;
  471         for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
  472                 if (i == ssize) {
  473                         bap = ebap;
  474                         soff = -i;
  475                 }
  476 #ifdef DIAGNOSTIC
  477                 if (!ffs_checkblk(ip,
  478                    dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
  479                         panic("ffs1_reallocblks: unallocated block 2");
  480                 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
  481                         panic("ffs1_reallocblks: alloc mismatch");
  482 #endif
  483 #ifdef DEBUG
  484                 if (prtrealloc)
  485                         printf(" %d,", *bap);
  486 #endif
  487                 if (DOINGSOFTDEP(vp)) {
  488                         if (sbap == &ip->i_ffs1_db[0] && i < ssize)
  489                                 softdep_setup_allocdirect(ip, start_lbn + i,
  490                                     blkno, *bap, fs->fs_bsize, fs->fs_bsize,
  491                                     buflist->bs_children[i]);
  492                         else
  493                                 softdep_setup_allocindir_page(ip, start_lbn + i,
  494                                     i < ssize ? sbp : ebp, soff + i, blkno,
  495                                     *bap, buflist->bs_children[i]);
  496                 }
  497 
  498                 *bap++ = blkno;
  499         }
  500         /*
  501          * Next we must write out the modified inode and indirect blocks.
  502          * For strict correctness, the writes should be synchronous since
  503          * the old block values may have been written to disk. In practise
  504          * they are almost never written, but if we are concerned about
  505          * strict correctness, the `doasyncfree' flag should be set to zero.
  506          *
  507          * The test on `doasyncfree' should be changed to test a flag
  508          * that shows whether the associated buffers and inodes have
  509          * been written. The flag should be set when the cluster is
  510          * started and cleared whenever the buffer or inode is flushed.
  511          * We can then check below to see if it is set, and do the
  512          * synchronous write only when it has been cleared.
  513          */
  514         if (sbap != &ip->i_ffs1_db[0]) {
  515                 if (doasyncfree)
  516                         bdwrite(sbp);
  517                 else
  518                         bwrite(sbp);
  519         } else {
  520                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  521                 if (!doasyncfree) {
  522                         UFS_UPDATE(ip, MNT_WAIT);
  523                 }
  524         }
  525         if (ssize < len) {
  526                 if (doasyncfree)
  527                         bdwrite(ebp);
  528                 else
  529                         bwrite(ebp);
  530         }
  531         /*
  532          * Last, free the old blocks and assign the new blocks to the buffers.
  533          */
  534 #ifdef DEBUG
  535         if (prtrealloc)
  536                 printf("\n\tnew:");
  537 #endif
  538         for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
  539                 if (!DOINGSOFTDEP(vp))
  540                         ffs_blkfree(ip,
  541                             dbtofsb(fs, buflist->bs_children[i]->b_blkno),
  542                             fs->fs_bsize);
  543                 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
  544 #ifdef DIAGNOSTIC
  545                 if (!ffs_checkblk(ip,
  546                    dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
  547                         panic("ffs1_reallocblks: unallocated block 3");
  548                 if (prtrealloc)
  549                         printf(" %d,", blkno);
  550 #endif
  551         }
  552 #ifdef DEBUG
  553         if (prtrealloc) {
  554                 prtrealloc--;
  555                 printf("\n");
  556         }
  557 #endif
  558         return (0);
  559 
  560 fail:
  561         if (ssize < len)
  562                 brelse(ebp);
  563         if (sbap != &ip->i_ffs1_db[0])
  564                 brelse(sbp);
  565         return (ENOSPC);
  566 }
  567 
  568 #ifdef FFS2
  569 int
  570 ffs2_reallocblks(void *v)
  571 {
  572         struct vop_reallocblks_args *ap = v;
  573         struct fs *fs;
  574         struct inode *ip;
  575         struct vnode *vp;
  576         struct buf *sbp, *ebp;
  577         daddr64_t *bap, *sbap, *ebap = 0;
  578         struct cluster_save *buflist;
  579         struct ufsmount *ump;
  580         daddr64_t start_lbn, end_lbn;
  581         daddr64_t soff, newblk, blkno, pref;
  582         struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
  583         int i, len, start_lvl, end_lvl, ssize;
  584 
  585         vp = ap->a_vp;
  586         ip = VTOI(vp);
  587         fs = ip->i_fs;
  588         ump = ip->i_ump;
  589 
  590         if (fs->fs_contigsumsize <= 0)
  591                 return (ENOSPC);
  592 
  593         buflist = ap->a_buflist;
  594         len = buflist->bs_nchildren;
  595         start_lbn = buflist->bs_children[0]->b_lblkno;
  596         end_lbn = start_lbn + len - 1;
  597 
  598 #ifdef DIAGNOSTIC
  599         for (i = 0; i < len; i++)
  600                 if (!ffs_checkblk(ip,
  601                    dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
  602                         panic("ffs2_reallocblks: unallocated block 1");
  603 
  604         for (i = 1; i < len; i++)
  605                 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
  606                         panic("ffs2_reallocblks: non-logical cluster");
  607 
  608         blkno = buflist->bs_children[0]->b_blkno;
  609         ssize = fsbtodb(fs, fs->fs_frag);
  610 
  611         for (i = 1; i < len - 1; i++)
  612                 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
  613                         panic("ffs2_reallocblks: non-physical cluster %d", i);
  614 #endif
  615 
  616         /*
  617          * If the latest allocation is in a new cylinder group, assume that
  618          * the filesystem has decided to move and do not force it back to
  619          * the previous cylinder group.
  620          */
  621         if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
  622             dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
  623                 return (ENOSPC);
  624         if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
  625             ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
  626                 return (ENOSPC);
  627 
  628         /*
  629          * Get the starting offset and block map for the first block.
  630          */
  631         if (start_lvl == 0) {
  632                 sbap = &ip->i_din2->di_db[0];
  633                 soff = start_lbn;
  634         } else {
  635                 idp = &start_ap[start_lvl - 1];
  636                 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
  637                         brelse(sbp);
  638                         return (ENOSPC);
  639                 }
  640                 sbap = (daddr64_t *)sbp->b_data;
  641                 soff = idp->in_off;
  642         }
  643 
  644         /*
  645          * If the block range spans two block maps, get the second map.
  646          */
  647         if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
  648                 ssize = len;
  649         } else {
  650 #ifdef DIAGNOSTIC
  651                 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
  652                         panic("ffs2_reallocblk: start == end");
  653 #endif
  654                 ssize = len - (idp->in_off + 1);
  655                 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
  656                         goto fail;
  657                 ebap = (daddr64_t *)ebp->b_data;
  658         }
  659 
  660         /*
  661          * Find the preferred location for the cluster.
  662          */
  663         pref = ffs2_blkpref(ip, start_lbn, soff, sbap);
  664 
  665         /*
  666          * Search the block map looking for an allocation of the desired size.
  667          */
  668         if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
  669             len, ffs_clusteralloc)) == 0)
  670                 goto fail;
  671 
  672         /*
  673          * We have found a new contiguous block.
  674          *
  675          * First we have to replace the old block pointers with the new
  676          * block pointers in the inode and indirect blocks associated
  677          * with the file.
  678          */
  679 #ifdef DEBUG
  680         if (prtrealloc)
  681                 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
  682                     (intmax_t)start_lbn, (intmax_t)end_lbn);
  683 #endif
  684 
  685         blkno = newblk;
  686 
  687         for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
  688                 if (i == ssize) {
  689                         bap = ebap;
  690                         soff = -i;
  691                 }
  692 #ifdef DIAGNOSTIC
  693                 if (!ffs_checkblk(ip,
  694                    dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
  695                         panic("ffs2_reallocblks: unallocated block 2");
  696                 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
  697                         panic("ffs2_reallocblks: alloc mismatch");
  698 #endif
  699 #ifdef DEBUG
  700                 if (prtrealloc)
  701                         printf(" %jd,", (intmax_t)*bap);
  702 #endif
  703                 if (DOINGSOFTDEP(vp)) {
  704                         if (sbap == &ip->i_din2->di_db[0] && i < ssize)
  705                                 softdep_setup_allocdirect(ip, start_lbn + i,
  706                                     blkno, *bap, fs->fs_bsize, fs->fs_bsize,
  707                                     buflist->bs_children[i]);
  708                         else
  709                                 softdep_setup_allocindir_page(ip, start_lbn + i,
  710                                     i < ssize ? sbp : ebp, soff + i, blkno,
  711                                     *bap, buflist->bs_children[i]);
  712                 }
  713                 *bap++ = blkno;
  714         }
  715 
  716         /*
  717          * Next we must write out the modified inode and indirect blocks.
  718          * For strict correctness, the writes should be synchronous since
  719          * the old block values may have been written to disk. In practise
  720          * they are almost never written, but if we are concerned about
  721          * strict correctness, the `doasyncfree' flag should be set to zero.
  722          *
  723          * The test on `doasyncfree' should be changed to test a flag
  724          * that shows whether the associated buffers and inodes have
  725          * been written. The flag should be set when the cluster is
  726          * started and cleared whenever the buffer or inode is flushed.
  727          * We can then check below to see if it is set, and do the
  728          * synchronous write only when it has been cleared.
  729          */
  730         if (sbap != &ip->i_din2->di_db[0]) {
  731                 if (doasyncfree)
  732                         bdwrite(sbp);
  733                 else
  734                         bwrite(sbp);
  735         } else {
  736                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  737                 if (!doasyncfree)
  738                         ffs_update(ip, NULL, NULL, MNT_WAIT);
  739         }
  740 
  741         if (ssize < len) {
  742                 if (doasyncfree)
  743                         bdwrite(ebp);
  744                 else
  745                         bwrite(ebp);
  746         }
  747 
  748         /*
  749          * Last, free the old blocks and assign the new blocks to the buffers.
  750          */
  751 #ifdef DEBUG
  752         if (prtrealloc)
  753                 printf("\n\tnew:");
  754 #endif
  755         for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
  756                 if (!DOINGSOFTDEP(vp))
  757                         ffs_blkfree(ip, dbtofsb(fs,
  758                             buflist->bs_children[i]->b_blkno), fs->fs_bsize);
  759                 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
  760 #ifdef DIAGNOSTIC
  761                 if (!ffs_checkblk(ip,
  762                    dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
  763                         panic("ffs2_reallocblks: unallocated block 3");
  764 #endif
  765 #ifdef DEBUG
  766                 if (prtrealloc)
  767                         printf(" %jd,", (intmax_t)blkno);
  768 #endif
  769         }
  770 #ifdef DEBUG
  771         if (prtrealloc) {
  772                 prtrealloc--;
  773                 printf("\n");
  774         }
  775 #endif
  776 
  777         return (0);
  778 
  779 fail:
  780         if (ssize < len)
  781                 brelse(ebp);
  782 
  783         if (sbap != &ip->i_din2->di_db[0])
  784                 brelse(sbp);
  785 
  786         return (ENOSPC);
  787 }
  788 #endif /* FFS2 */
  789 
  790 int
  791 ffs_reallocblks(void *v)
  792 {
  793 #ifdef FFS2
  794         struct vop_reallocblks_args *ap = v;
  795 #endif
  796 
  797         if (!doreallocblks)
  798                 return (ENOSPC);
  799 
  800 #ifdef FFS2
  801         if (VTOI(ap->a_vp)->i_ump->um_fstype == UM_UFS2)
  802                 return (ffs2_reallocblks(v));
  803 #endif
  804 
  805         return (ffs1_reallocblks(v));
  806 }
  807 
  808 /*
  809  * Allocate an inode in the file system.
  810  * 
  811  * If allocating a directory, use ffs_dirpref to select the inode.
  812  * If allocating in a directory, the following hierarchy is followed:
  813  *   1) allocate the preferred inode.
  814  *   2) allocate an inode in the same cylinder group.
  815  *   3) quadratically rehash into other cylinder groups, until an
  816  *      available inode is located.
  817  * If no inode preference is given the following hierarchy is used
  818  * to allocate an inode:
  819  *   1) allocate an inode in cylinder group 0.
  820  *   2) quadratically rehash into other cylinder groups, until an
  821  *      available inode is located.
  822  */
  823 int
  824 ffs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred,
  825     struct vnode **vpp)
  826 {
  827         struct vnode *pvp = ITOV(pip);
  828         struct fs *fs;
  829         struct inode *ip;
  830         ino_t ino, ipref;
  831         int cg, error;
  832         
  833         *vpp = NULL;
  834         fs = pip->i_fs;
  835         if (fs->fs_cstotal.cs_nifree == 0)
  836                 goto noinodes;
  837 
  838         if ((mode & IFMT) == IFDIR)
  839                 ipref = ffs_dirpref(pip);
  840         else
  841                 ipref = pip->i_number;
  842         if (ipref >= fs->fs_ncg * fs->fs_ipg)
  843                 ipref = 0;
  844         cg = ino_to_cg(fs, ipref);
  845 
  846         /*
  847          * Track number of dirs created one after another
  848          * in a same cg without intervening by files.
  849          */
  850         if ((mode & IFMT) == IFDIR) {
  851                 if (fs->fs_contigdirs[cg] < 255)
  852                         fs->fs_contigdirs[cg]++;
  853         } else {
  854                 if (fs->fs_contigdirs[cg] > 0)
  855                         fs->fs_contigdirs[cg]--;
  856         }
  857         ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
  858         if (ino == 0)
  859                 goto noinodes;
  860         error = VFS_VGET(pvp->v_mount, ino, vpp);
  861         if (error) {
  862                 ffs_inode_free(pip, ino, mode);
  863                 return (error);
  864         }
  865 
  866         ip = VTOI(*vpp);
  867 
  868         if (DIP(ip, mode)) {
  869                 printf("mode = 0%o, inum = %d, fs = %s\n",
  870                     DIP(ip, mode), ip->i_number, fs->fs_fsmnt);
  871                 panic("ffs_valloc: dup alloc");
  872         }
  873 
  874         if (DIP(ip, blocks)) {
  875                 printf("free inode %s/%d had %d blocks\n",
  876                     fs->fs_fsmnt, ino, DIP(ip, blocks));
  877                 DIP_ASSIGN(ip, blocks, 0);
  878         }
  879 
  880         DIP_ASSIGN(ip, flags, 0);
  881 
  882         /*
  883          * Set up a new generation number for this inode.
  884          * XXX - just increment for now, this is wrong! (millert)
  885          *       Need a way to preserve randomization.
  886          */
  887         if (DIP(ip, gen) == 0 || ++(DIP(ip, gen)) == 0)
  888                 DIP_ASSIGN(ip, gen, arc4random() & INT_MAX);
  889 
  890         if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1)
  891                 DIP_ASSIGN(ip, gen, 1); /* Shouldn't happen */
  892 
  893         return (0);
  894 
  895 noinodes:
  896         ffs_fserr(fs, cred->cr_uid, "out of inodes");
  897         uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
  898         return (ENOSPC);
  899 }
  900 
  901 /*
  902  * Find a cylinder group to place a directory.
  903  *
  904  * The policy implemented by this algorithm is to allocate a
  905  * directory inode in the same cylinder group as its parent
  906  * directory, but also to reserve space for its files inodes
  907  * and data. Restrict the number of directories which may be
  908  * allocated one after another in the same cylinder group
  909  * without intervening allocation of files.
  910  *
  911  * If we allocate a first level directory then force allocation
  912  * in another cylinder group.
  913  */
  914 ino_t
  915 ffs_dirpref(struct inode *pip)
  916 {
  917         struct fs *fs;
  918         int     cg, prefcg, dirsize, cgsize;
  919         int     avgifree, avgbfree, avgndir, curdirsize;
  920         int     minifree, minbfree, maxndir;
  921         int     mincg, minndir;
  922         int     maxcontigdirs;
  923 
  924         fs = pip->i_fs;
  925 
  926         avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
  927         avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
  928         avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
  929 #if 1
  930 
  931         /*
  932          * Force allocation in another cg if creating a first level dir.
  933          */
  934         if (ITOV(pip)->v_flag & VROOT) {
  935                 prefcg = (arc4random() & INT_MAX) % fs->fs_ncg;
  936                 mincg = prefcg;
  937                 minndir = fs->fs_ipg;
  938                 for (cg = prefcg; cg < fs->fs_ncg; cg++)
  939                         if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
  940                             fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
  941                             fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
  942                                 mincg = cg;
  943                                 minndir = fs->fs_cs(fs, cg).cs_ndir;
  944                         }
  945                 for (cg = 0; cg < prefcg; cg++)
  946                         if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
  947                             fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
  948                             fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
  949                                 mincg = cg;
  950                                 minndir = fs->fs_cs(fs, cg).cs_ndir;
  951                         }
  952                 cg = mincg;
  953                 goto end;
  954         } else
  955                 prefcg = ino_to_cg(fs, pip->i_number);
  956 #else
  957         prefcg = ino_to_cg(fs, pip->i_number);
  958 #endif
  959 
  960         /*
  961          * Count various limits which used for
  962          * optimal allocation of a directory inode.
  963          */
  964 #if 1
  965         maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
  966         minifree = avgifree - fs->fs_ipg / 4;
  967         if (minifree < 0)
  968                 minifree = 0;
  969         minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
  970         if (minbfree < 0)
  971                 minbfree = 0;
  972 #else
  973         maxndir = avgndir + (fs->fs_ipg - avgndir) / 16;
  974         minifree = avgifree * 3 / 4;
  975         minbfree = avgbfree * 3 / 4;
  976 #endif
  977         cgsize = fs->fs_fsize * fs->fs_fpg;
  978         dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
  979         curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
  980         if (dirsize < curdirsize)
  981                 dirsize = curdirsize;
  982         maxcontigdirs = min(cgsize / dirsize, 255);
  983         if (fs->fs_avgfpdir > 0)
  984                 maxcontigdirs = min(maxcontigdirs,
  985                                     fs->fs_ipg / fs->fs_avgfpdir);
  986         if (maxcontigdirs == 0)
  987                 maxcontigdirs = 1;
  988 
  989         /*
  990          * Limit number of dirs in one cg and reserve space for 
  991          * regular files, but only if we have no deficit in
  992          * inodes or space.
  993          */
  994         for (cg = prefcg; cg < fs->fs_ncg; cg++)
  995                 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
  996                     fs->fs_cs(fs, cg).cs_nifree >= minifree &&
  997                     fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
  998                         if (fs->fs_contigdirs[cg] < maxcontigdirs)
  999                                 goto end;
 1000                 }
 1001         for (cg = 0; cg < prefcg; cg++)
 1002                 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
 1003                     fs->fs_cs(fs, cg).cs_nifree >= minifree &&
 1004                     fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
 1005                         if (fs->fs_contigdirs[cg] < maxcontigdirs)
 1006                                 goto end;
 1007                 }
 1008         /*
 1009          * This is a backstop when we have deficit in space.
 1010          */
 1011         for (cg = prefcg; cg < fs->fs_ncg; cg++)
 1012                 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
 1013                         goto end;
 1014         for (cg = 0; cg < prefcg; cg++)
 1015                 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
 1016                         goto end;
 1017 end:
 1018         return ((ino_t)(fs->fs_ipg * cg));
 1019 }
 1020 
 1021 /*
 1022  * Select the desired position for the next block in a file.  The file is
 1023  * logically divided into sections. The first section is composed of the
 1024  * direct blocks. Each additional section contains fs_maxbpg blocks.
 1025  *
 1026  * If no blocks have been allocated in the first section, the policy is to
 1027  * request a block in the same cylinder group as the inode that describes
 1028  * the file. If no blocks have been allocated in any other section, the
 1029  * policy is to place the section in a cylinder group with a greater than
 1030  * average number of free blocks.  An appropriate cylinder group is found
 1031  * by using a rotor that sweeps the cylinder groups. When a new group of
 1032  * blocks is needed, the sweep begins in the cylinder group following the
 1033  * cylinder group from which the previous allocation was made. The sweep
 1034  * continues until a cylinder group with greater than the average number
 1035  * of free blocks is found. If the allocation is for the first block in an
 1036  * indirect block, the information on the previous allocation is unavailable;
 1037  * here a best guess is made based upon the logical block number being
 1038  * allocated.
 1039  */
 1040 int32_t
 1041 ffs1_blkpref(struct inode *ip, daddr_t lbn, int indx, int32_t *bap)
 1042 {
 1043         struct fs *fs;
 1044         int cg, avgbfree, startcg;
 1045 
 1046         fs = ip->i_fs;
 1047         if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 1048                 if (lbn < NDADDR + NINDIR(fs)) {
 1049                         cg = ino_to_cg(fs, ip->i_number);
 1050                         return (fs->fs_fpg * cg + fs->fs_frag);
 1051                 }
 1052                 /*
 1053                  * Find a cylinder with greater than average number of
 1054                  * unused data blocks.
 1055                  */
 1056                 if (indx == 0 || bap[indx - 1] == 0)
 1057                         startcg =
 1058                             ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
 1059                 else
 1060                         startcg = dtog(fs, bap[indx - 1]) + 1;
 1061                 startcg %= fs->fs_ncg;
 1062                 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 1063                 for (cg = startcg; cg < fs->fs_ncg; cg++)
 1064                         if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 1065                                 fs->fs_cgrotor = cg;
 1066                                 return (fs->fs_fpg * cg + fs->fs_frag);
 1067                         }
 1068                 for (cg = 0; cg <= startcg; cg++)
 1069                         if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 1070                                 fs->fs_cgrotor = cg;
 1071                                 return (fs->fs_fpg * cg + fs->fs_frag);
 1072                         }
 1073                 return (0);
 1074         }
 1075 
 1076         return (bap[indx - 1] + fs->fs_frag);
 1077 }
 1078 
 1079 /*
 1080  * Same as above, for UFS2.
 1081  */
 1082 #ifdef FFS2
 1083 int64_t
 1084 ffs2_blkpref(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
 1085 {
 1086         struct fs *fs;
 1087         int cg, avgbfree, startcg;
 1088 
 1089         fs = ip->i_fs;
 1090 
 1091         if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 1092                 if (lbn < NDADDR + NINDIR(fs)) {
 1093                         cg = ino_to_cg(fs, ip->i_number);
 1094                         return (fs->fs_fpg * cg + fs->fs_frag);
 1095                 }
 1096 
 1097                 /*
 1098                  * Find a cylinder with greater than average number of
 1099                  * unused data blocks.
 1100                  */
 1101                 if (indx == 0 || bap[indx - 1] == 0)
 1102                         startcg = ino_to_cg(fs, ip->i_number) +
 1103                             lbn / fs->fs_maxbpg;
 1104                 else
 1105                         startcg = dtog(fs, bap[indx - 1] + 1);
 1106 
 1107                 startcg %= fs->fs_ncg;
 1108                 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 1109 
 1110                 for (cg = startcg; cg < fs->fs_ncg; cg++)
 1111                         if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree)
 1112                                 return (fs->fs_fpg * cg + fs->fs_frag);
 1113 
 1114                 for (cg = 0; cg < startcg; cg++)
 1115                         if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree)
 1116                                 return (fs->fs_fpg * cg + fs->fs_frag);
 1117 
 1118                 return (0);
 1119         }
 1120 
 1121         /*
 1122          * We always just try to lay things out contiguously.
 1123          */
 1124         return (bap[indx - 1] + fs->fs_frag);
 1125 }
 1126 #endif /* FFS2 */
 1127 
 1128 /*
 1129  * Implement the cylinder overflow algorithm.
 1130  *
 1131  * The policy implemented by this algorithm is:
 1132  *   1) allocate the block in its requested cylinder group.
 1133  *   2) quadratically rehash on the cylinder group number.
 1134  *   3) brute force search for a free block.
 1135  */
 1136 /*VARARGS5*/
 1137 u_long
 1138 ffs_hashalloc(struct inode *ip, int cg, long pref, int size,
 1139     daddr_t (*allocator)(struct inode *, int, daddr_t, int))
 1140 {
 1141         struct fs *fs;
 1142         long result;
 1143         int i, icg = cg;
 1144 
 1145         fs = ip->i_fs;
 1146         /*
 1147          * 1: preferred cylinder group
 1148          */
 1149         result = (*allocator)(ip, cg, pref, size);
 1150         if (result)
 1151                 return (result);
 1152         /*
 1153          * 2: quadratic rehash
 1154          */
 1155         for (i = 1; i < fs->fs_ncg; i *= 2) {
 1156                 cg += i;
 1157                 if (cg >= fs->fs_ncg)
 1158                         cg -= fs->fs_ncg;
 1159                 result = (*allocator)(ip, cg, 0, size);
 1160                 if (result)
 1161                         return (result);
 1162         }
 1163         /*
 1164          * 3: brute force search
 1165          * Note that we start at i == 2, since 0 was checked initially,
 1166          * and 1 is always checked in the quadratic rehash.
 1167          */
 1168         cg = (icg + 2) % fs->fs_ncg;
 1169         for (i = 2; i < fs->fs_ncg; i++) {
 1170                 result = (*allocator)(ip, cg, 0, size);
 1171                 if (result)
 1172                         return (result);
 1173                 cg++;
 1174                 if (cg == fs->fs_ncg)
 1175                         cg = 0;
 1176         }
 1177         return (0);
 1178 }
 1179 
 1180 /*
 1181  * Determine whether a fragment can be extended.
 1182  *
 1183  * Check to see if the necessary fragments are available, and
 1184  * if they are, allocate them.
 1185  */
 1186 daddr_t
 1187 ffs_fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize)
 1188 {
 1189         struct fs *fs;
 1190         struct cg *cgp;
 1191         struct buf *bp;
 1192         long bno;
 1193         int frags, bbase;
 1194         int i, error;
 1195 
 1196         fs = ip->i_fs;
 1197         if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
 1198                 return (0);
 1199         frags = numfrags(fs, nsize);
 1200         bbase = fragnum(fs, bprev);
 1201         if (bbase > fragnum(fs, (bprev + frags - 1))) {
 1202                 /* cannot extend across a block boundary */
 1203                 return (0);
 1204         }
 1205         error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 1206                 (int)fs->fs_cgsize, NOCRED, &bp);
 1207         if (error) {
 1208                 brelse(bp);
 1209                 return (0);
 1210         }
 1211         cgp = (struct cg *)bp->b_data;
 1212         if (!cg_chkmagic(cgp)) {
 1213                 brelse(bp);
 1214                 return (0);
 1215         }
 1216 
 1217         cgp->cg_ffs2_time = cgp->cg_time = time_second;
 1218 
 1219         bno = dtogd(fs, bprev);
 1220         for (i = numfrags(fs, osize); i < frags; i++)
 1221                 if (isclr(cg_blksfree(cgp), bno + i)) {
 1222                         brelse(bp);
 1223                         return (0);
 1224                 }
 1225         /*
 1226          * the current fragment can be extended
 1227          * deduct the count on fragment being extended into
 1228          * increase the count on the remaining fragment (if any)
 1229          * allocate the extended piece
 1230          */
 1231         for (i = frags; i < fs->fs_frag - bbase; i++)
 1232                 if (isclr(cg_blksfree(cgp), bno + i))
 1233                         break;
 1234         cgp->cg_frsum[i - numfrags(fs, osize)]--;
 1235         if (i != frags)
 1236                 cgp->cg_frsum[i - frags]++;
 1237         for (i = numfrags(fs, osize); i < frags; i++) {
 1238                 clrbit(cg_blksfree(cgp), bno + i);
 1239                 cgp->cg_cs.cs_nffree--;
 1240                 fs->fs_cstotal.cs_nffree--;
 1241                 fs->fs_cs(fs, cg).cs_nffree--;
 1242         }
 1243         fs->fs_fmod = 1;
 1244         if (DOINGSOFTDEP(ITOV(ip)))
 1245                 softdep_setup_blkmapdep(bp, fs, bprev);
 1246 
 1247         bdwrite(bp);
 1248         return (bprev);
 1249 }
 1250 
 1251 /*
 1252  * Determine whether a block can be allocated.
 1253  *
 1254  * Check to see if a block of the appropriate size is available,
 1255  * and if it is, allocate it.
 1256  */
 1257 daddr_t
 1258 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
 1259 {
 1260         struct fs *fs;
 1261         struct cg *cgp;
 1262         struct buf *bp;
 1263         daddr_t bno, blkno;
 1264         int error, i, frags, allocsiz;
 1265 
 1266         fs = ip->i_fs;
 1267         if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
 1268                 return (0);
 1269         /* read cylinder group block */
 1270         error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 1271                 (int)fs->fs_cgsize, NOCRED, &bp);
 1272         if (error) {
 1273                 brelse(bp);
 1274                 return (0);
 1275         }
 1276         cgp = (struct cg *)bp->b_data;
 1277         if (!cg_chkmagic(cgp) ||
 1278             (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
 1279                 brelse(bp);
 1280                 return (0);
 1281         }
 1282 
 1283         cgp->cg_ffs2_time = cgp->cg_time = time_second;
 1284 
 1285         if (size == fs->fs_bsize) {
 1286                 /* allocate and return a complete data block */
 1287                 bno = ffs_alloccgblk(ip, bp, bpref);
 1288                 bdwrite(bp);
 1289                 return (bno);
 1290         }
 1291         /*
 1292          * check to see if any fragments are already available
 1293          * allocsiz is the size which will be allocated, hacking
 1294          * it down to a smaller size if necessary
 1295          */
 1296         frags = numfrags(fs, size);
 1297         for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
 1298                 if (cgp->cg_frsum[allocsiz] != 0)
 1299                         break;
 1300         if (allocsiz == fs->fs_frag) {
 1301                 /*
 1302                  * no fragments were available, so a block will be 
 1303                  * allocated, and hacked up
 1304                  */
 1305                 if (cgp->cg_cs.cs_nbfree == 0) {
 1306                         brelse(bp);
 1307                         return (0);
 1308                 }
 1309                 bno = ffs_alloccgblk(ip, bp, bpref);
 1310                 bpref = dtogd(fs, bno);
 1311                 for (i = frags; i < fs->fs_frag; i++)
 1312                         setbit(cg_blksfree(cgp), bpref + i);
 1313                 i = fs->fs_frag - frags;
 1314                 cgp->cg_cs.cs_nffree += i;
 1315                 fs->fs_cstotal.cs_nffree += i;
 1316                 fs->fs_cs(fs, cg).cs_nffree += i;
 1317                 fs->fs_fmod = 1;
 1318                 cgp->cg_frsum[i]++;
 1319                 bdwrite(bp);
 1320                 return (bno);
 1321         }
 1322         bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
 1323         if (bno < 0) {
 1324                 brelse(bp);
 1325                 return (0);
 1326         }
 1327 
 1328         for (i = 0; i < frags; i++)
 1329                 clrbit(cg_blksfree(cgp), bno + i);
 1330         cgp->cg_cs.cs_nffree -= frags;
 1331         fs->fs_cstotal.cs_nffree -= frags;
 1332         fs->fs_cs(fs, cg).cs_nffree -= frags;
 1333         fs->fs_fmod = 1;
 1334         cgp->cg_frsum[allocsiz]--;
 1335         if (frags != allocsiz)
 1336                 cgp->cg_frsum[allocsiz - frags]++;
 1337 
 1338         blkno = cg * fs->fs_fpg + bno;
 1339         if (DOINGSOFTDEP(ITOV(ip)))
 1340                 softdep_setup_blkmapdep(bp, fs, blkno);
 1341         bdwrite(bp);
 1342         return ((u_long)blkno);
 1343 }
 1344 
 1345 /*
 1346  * Allocate a block in a cylinder group.
 1347  * Note that this routine only allocates fs_bsize blocks; these
 1348  * blocks may be fragmented by the routine that allocates them.
 1349  */
 1350 daddr_t
 1351 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
 1352 {
 1353         struct fs *fs;
 1354         struct cg *cgp;
 1355         daddr_t bno, blkno;
 1356         u_int8_t *blksfree;
 1357         int cylno;
 1358 
 1359         fs = ip->i_fs;
 1360         cgp = (struct cg *) bp->b_data;
 1361         blksfree = cg_blksfree(cgp);
 1362 
 1363         if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx)
 1364                 bpref = cgp->cg_rotor;
 1365         else {
 1366                 bpref = blknum(fs, bpref);
 1367                 bno = dtogd(fs, bpref);
 1368                 /*
 1369                  * If the requested block is available, use it.
 1370                  */
 1371                 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
 1372                         goto gotit;
 1373         }
 1374 
 1375         /*
 1376          * Take the next available block in this cylinder group.
 1377          */
 1378         bno = ffs_mapsearch(fs, cgp, bpref, (int) fs->fs_frag);
 1379         if (bno < 0)
 1380                 return (0);
 1381 
 1382         cgp->cg_rotor = bno;
 1383 
 1384 gotit:
 1385         blkno = fragstoblks(fs, bno);
 1386         ffs_clrblock(fs, blksfree, (long) blkno);
 1387         ffs_clusteracct(fs, cgp, blkno, -1);
 1388         cgp->cg_cs.cs_nbfree--;
 1389         fs->fs_cstotal.cs_nbfree--;
 1390         fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
 1391 
 1392         if (fs->fs_magic != FS_UFS2_MAGIC) {
 1393                 cylno = cbtocylno(fs, bno);
 1394                 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
 1395                 cg_blktot(cgp)[cylno]--;
 1396         }
 1397 
 1398         fs->fs_fmod = 1;
 1399         blkno = cgp->cg_cgx * fs->fs_fpg + bno;
 1400 
 1401         if (DOINGSOFTDEP(ITOV(ip)))
 1402                 softdep_setup_blkmapdep(bp, fs, blkno);
 1403 
 1404         return (blkno);
 1405 }
 1406 
 1407 /*
 1408  * Determine whether a cluster can be allocated.
 1409  *
 1410  * We do not currently check for optimal rotational layout if there
 1411  * are multiple choices in the same cylinder group. Instead we just
 1412  * take the first one that we find following bpref.
 1413  */
 1414 daddr_t
 1415 ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
 1416 {
 1417         struct fs *fs;
 1418         struct cg *cgp;
 1419         struct buf *bp;
 1420         int i, got, run, bno, bit, map;
 1421         u_char *mapp;
 1422         int32_t *lp;
 1423 
 1424         fs = ip->i_fs;
 1425         if (fs->fs_maxcluster[cg] < len)
 1426                 return (0);
 1427         if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
 1428             NOCRED, &bp))
 1429                 goto fail;
 1430         cgp = (struct cg *)bp->b_data;
 1431         if (!cg_chkmagic(cgp))
 1432                 goto fail;
 1433         /*
 1434          * Check to see if a cluster of the needed size (or bigger) is
 1435          * available in this cylinder group.
 1436          */
 1437         lp = &cg_clustersum(cgp)[len];
 1438         for (i = len; i <= fs->fs_contigsumsize; i++)
 1439                 if (*lp++ > 0)
 1440                         break;
 1441         if (i > fs->fs_contigsumsize) {
 1442                 /*
 1443                  * This is the first time looking for a cluster in this
 1444                  * cylinder group. Update the cluster summary information
 1445                  * to reflect the true maximum sized cluster so that
 1446                  * future cluster allocation requests can avoid reading
 1447                  * the cylinder group map only to find no clusters.
 1448                  */
 1449                 lp = &cg_clustersum(cgp)[len - 1];
 1450                 for (i = len - 1; i > 0; i--)
 1451                         if (*lp-- > 0)
 1452                                 break;
 1453                 fs->fs_maxcluster[cg] = i;
 1454                 goto fail;
 1455         }
 1456         /*
 1457          * Search the cluster map to find a big enough cluster.
 1458          * We take the first one that we find, even if it is larger
 1459          * than we need as we prefer to get one close to the previous
 1460          * block allocation. We do not search before the current
 1461          * preference point as we do not want to allocate a block
 1462          * that is allocated before the previous one (as we will
 1463          * then have to wait for another pass of the elevator
 1464          * algorithm before it will be read). We prefer to fail and
 1465          * be recalled to try an allocation in the next cylinder group.
 1466          */
 1467         if (dtog(fs, bpref) != cg)
 1468                 bpref = 0;
 1469         else
 1470                 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
 1471         mapp = &cg_clustersfree(cgp)[bpref / NBBY];
 1472         map = *mapp++;
 1473         bit = 1 << (bpref % NBBY);
 1474         for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
 1475                 if ((map & bit) == 0) {
 1476                         run = 0;
 1477                 } else {
 1478                         run++;
 1479                         if (run == len)
 1480                                 break;
 1481                 }
 1482                 if ((got & (NBBY - 1)) != (NBBY - 1)) {
 1483                         bit <<= 1;
 1484                 } else {
 1485                         map = *mapp++;
 1486                         bit = 1;
 1487                 }
 1488         }
 1489         if (got >= cgp->cg_nclusterblks)
 1490                 goto fail;
 1491         /*
 1492          * Allocate the cluster that we have found.
 1493          */
 1494         cgp->cg_ffs2_time = cgp->cg_time = time_second;
 1495 
 1496 #ifdef DIAGNOSTIC
 1497         for (i = 1; i <= len; i++)
 1498                 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
 1499                         panic("ffs_clusteralloc: map mismatch");
 1500 #endif
 1501         bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
 1502 #ifdef DIAGNOSTIC
 1503         if (dtog(fs, bno) != cg)
 1504                 panic("ffs_clusteralloc: allocated out of group");
 1505 #endif
 1506 
 1507         len = blkstofrags(fs, len);
 1508         for (i = 0; i < len; i += fs->fs_frag)
 1509                 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
 1510                         panic("ffs_clusteralloc: lost block");
 1511         bdwrite(bp);
 1512         return (bno);
 1513 
 1514 fail:
 1515         brelse(bp);
 1516         return (0);
 1517 }
 1518 
 1519 /* inode allocation routine */
 1520 daddr_t
 1521 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
 1522 {
 1523         struct fs *fs;
 1524         struct cg *cgp;
 1525         struct buf *bp;
 1526         int error, start, len, loc, map, i;
 1527 #ifdef FFS2
 1528         struct buf *ibp = NULL;
 1529         struct ufs2_dinode *dp2;
 1530 #endif
 1531 
 1532         /*
 1533          * For efficiency, before looking at the bitmaps for free inodes,
 1534          * check the counters kept in the superblock cylinder group summaries,
 1535          * and in the cylinder group itself.
 1536          */
 1537         fs = ip->i_fs;
 1538         if (fs->fs_cs(fs, cg).cs_nifree == 0)
 1539                 return (0);
 1540 
 1541         error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 1542                 (int)fs->fs_cgsize, NOCRED, &bp);
 1543         if (error) {
 1544                 brelse(bp);
 1545                 return (0);
 1546         }
 1547 
 1548         cgp = (struct cg *)bp->b_data;
 1549         if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
 1550                 brelse(bp);
 1551                 return (0);
 1552         }
 1553 
 1554         /*
 1555          * We are committed to the allocation from now on, so update the time
 1556          * on the cylinder group.
 1557          */
 1558         cgp->cg_ffs2_time = cgp->cg_time = time_second;
 1559 
 1560         /*
 1561          * If there was a preferred location for the new inode, try to find it.
 1562          */
 1563         if (ipref) {
 1564                 ipref %= fs->fs_ipg;
 1565                 if (isclr(cg_inosused(cgp), ipref))
 1566                         goto gotit; /* inode is free, grab it. */
 1567         }
 1568 
 1569         /*
 1570          * Otherwise, look for the next available inode, starting at cg_irotor
 1571          * (the position in the bitmap of the last used inode).
 1572          */
 1573         start = cgp->cg_irotor / NBBY;
 1574         len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
 1575         loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
 1576         if (loc == 0) {
 1577                 /*
 1578                  * If we didn't find a free inode in the upper part of the
 1579                  * bitmap (from cg_irotor to the end), then look at the bottom
 1580                  * part (from 0 to cg_irotor).
 1581                  */
 1582                 len = start + 1;
 1583                 start = 0;
 1584                 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
 1585                 if (loc == 0) {
 1586                         /*
 1587                          * If we failed again, then either the bitmap or the
 1588                          * counters kept for the cylinder group are wrong.
 1589                          */
 1590                         printf("cg = %d, irotor = %d, fs = %s\n",
 1591                             cg, cgp->cg_irotor, fs->fs_fsmnt);
 1592                         panic("ffs_nodealloccg: map corrupted");
 1593                         /* NOTREACHED */
 1594                 }
 1595         }
 1596 
 1597         /* skpc() returns the position relative to the end */
 1598         i = start + len - loc;
 1599 
 1600         /*
 1601          * Okay, so now in 'i' we have the location in the bitmap of a byte
 1602          * holding a free inode. Find the corresponding bit and set it,
 1603          * updating cg_irotor as well, accordingly.
 1604          */
 1605         map = cg_inosused(cgp)[i];
 1606         ipref = i * NBBY;
 1607         for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
 1608                 if ((map & i) == 0) {
 1609                         cgp->cg_irotor = ipref;
 1610                         goto gotit;
 1611                 }
 1612         }
 1613 
 1614         printf("fs = %s\n", fs->fs_fsmnt);
 1615         panic("ffs_nodealloccg: block not in map");
 1616         /* NOTREACHED */
 1617 
 1618 gotit:
 1619 
 1620 #ifdef FFS2
 1621         /*
 1622          * For FFS2, check if all inodes in this cylinder group have been used
 1623          * at least once. If they haven't, and we are allocating an inode past
 1624          * the last allocated block of inodes, read in a block and initialize
 1625          * all inodes in it.
 1626          */
 1627         if (fs->fs_magic == FS_UFS2_MAGIC &&
 1628             /* Inode is beyond last initialized block of inodes? */
 1629             ipref + INOPB(fs) > cgp->cg_initediblk &&
 1630             /* Has any inode not been used at least once? */
 1631             cgp->cg_initediblk < cgp->cg_ffs2_niblk) {
 1632 
 1633                 ibp = getblk(ip->i_devvp, fsbtodb(fs,
 1634                     ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
 1635                     (int)fs->fs_bsize, 0, 0);
 1636 
 1637                 bzero(ibp->b_data, (int)fs->fs_bsize);
 1638                 dp2 = (struct ufs2_dinode *)(ibp->b_data);
 1639 
 1640                 /* Give each inode a positive generation number */
 1641                 for (i = 0; i < INOPB(fs); i++) {
 1642                         dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
 1643                         dp2++;
 1644                 }
 1645 
 1646                 /* Update the counter of initialized inodes */
 1647                 cgp->cg_initediblk += INOPB(fs);
 1648         }
 1649 #endif /* FFS2 */
 1650 
 1651         if (DOINGSOFTDEP(ITOV(ip)))
 1652                 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
 1653 
 1654         setbit(cg_inosused(cgp), ipref);
 1655 
 1656         /* Update the counters we keep on free inodes */
 1657         cgp->cg_cs.cs_nifree--;
 1658         fs->fs_cstotal.cs_nifree--;
 1659         fs->fs_cs(fs, cg).cs_nifree--;
 1660         fs->fs_fmod = 1; /* file system was modified */
 1661 
 1662         /* Update the counters we keep on allocated directories */
 1663         if ((mode & IFMT) == IFDIR) {
 1664                 cgp->cg_cs.cs_ndir++;
 1665                 fs->fs_cstotal.cs_ndir++;
 1666                 fs->fs_cs(fs, cg).cs_ndir++;
 1667         }
 1668 
 1669         bdwrite(bp);
 1670 
 1671 #ifdef FFS2
 1672         if (ibp != NULL)
 1673                 bawrite(ibp);
 1674 #endif
 1675 
 1676         /* Return the allocated inode number */
 1677         return (cg * fs->fs_ipg + ipref);
 1678 }
 1679 
 1680 /*
 1681  * Free a block or fragment.
 1682  *
 1683  * The specified block or fragment is placed back in the
 1684  * free map. If a fragment is deallocated, a possible
 1685  * block reassembly is checked.
 1686  */
 1687 void
 1688 ffs_blkfree(struct inode *ip, daddr64_t bno, long size)
 1689 {
 1690         struct fs *fs;
 1691         struct cg *cgp;
 1692         struct buf *bp;
 1693         daddr_t blkno;
 1694         int i, error, cg, blk, frags, bbase;
 1695 
 1696         fs = ip->i_fs;
 1697         if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 1698             fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 1699                 printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n",
 1700                     ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
 1701                 panic("ffs_blkfree: bad size");
 1702         }
 1703         cg = dtog(fs, bno);
 1704         if ((u_int)bno >= fs->fs_size) {
 1705                 printf("bad block %d, ino %u\n", bno, ip->i_number);
 1706                 ffs_fserr(fs, DIP(ip, uid), "bad block");
 1707                 return;
 1708         }
 1709         error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 1710                 (int)fs->fs_cgsize, NOCRED, &bp);
 1711         if (error) {
 1712                 brelse(bp);
 1713                 return;
 1714         }
 1715         cgp = (struct cg *)bp->b_data;
 1716         if (!cg_chkmagic(cgp)) {
 1717                 brelse(bp);
 1718                 return;
 1719         }
 1720 
 1721         cgp->cg_ffs2_time = cgp->cg_time = time_second;
 1722 
 1723         bno = dtogd(fs, bno);
 1724         if (size == fs->fs_bsize) {
 1725                 blkno = fragstoblks(fs, bno);
 1726                 if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) {
 1727                         printf("dev = 0x%x, block = %d, fs = %s\n",
 1728                             ip->i_dev, bno, fs->fs_fsmnt);
 1729                         panic("ffs_blkfree: freeing free block");
 1730                 }
 1731                 ffs_setblock(fs, cg_blksfree(cgp), blkno);
 1732                 ffs_clusteracct(fs, cgp, blkno, 1);
 1733                 cgp->cg_cs.cs_nbfree++;
 1734                 fs->fs_cstotal.cs_nbfree++;
 1735                 fs->fs_cs(fs, cg).cs_nbfree++;
 1736 
 1737                 if (fs->fs_magic != FS_UFS2_MAGIC) {
 1738                         i = cbtocylno(fs, bno);
 1739                         cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
 1740                         cg_blktot(cgp)[i]++;
 1741                 }
 1742 
 1743         } else {
 1744                 bbase = bno - fragnum(fs, bno);
 1745                 /*
 1746                  * decrement the counts associated with the old frags
 1747                  */
 1748                 blk = blkmap(fs, cg_blksfree(cgp), bbase);
 1749                 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 1750                 /*
 1751                  * deallocate the fragment
 1752                  */
 1753                 frags = numfrags(fs, size);
 1754                 for (i = 0; i < frags; i++) {
 1755                         if (isset(cg_blksfree(cgp), bno + i)) {
 1756                                 printf("dev = 0x%x, block = %d, fs = %s\n",
 1757                                     ip->i_dev, bno + i, fs->fs_fsmnt);
 1758                                 panic("ffs_blkfree: freeing free frag");
 1759                         }
 1760                         setbit(cg_blksfree(cgp), bno + i);
 1761                 }
 1762                 cgp->cg_cs.cs_nffree += i;
 1763                 fs->fs_cstotal.cs_nffree += i;
 1764                 fs->fs_cs(fs, cg).cs_nffree += i;
 1765                 /*
 1766                  * add back in counts associated with the new frags
 1767                  */
 1768                 blk = blkmap(fs, cg_blksfree(cgp), bbase);
 1769                 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 1770                 /*
 1771                  * if a complete block has been reassembled, account for it
 1772                  */
 1773                 blkno = fragstoblks(fs, bbase);
 1774                 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
 1775                         cgp->cg_cs.cs_nffree -= fs->fs_frag;
 1776                         fs->fs_cstotal.cs_nffree -= fs->fs_frag;
 1777                         fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
 1778                         ffs_clusteracct(fs, cgp, blkno, 1);
 1779                         cgp->cg_cs.cs_nbfree++;
 1780                         fs->fs_cstotal.cs_nbfree++;
 1781                         fs->fs_cs(fs, cg).cs_nbfree++;
 1782 
 1783                         if (fs->fs_magic != FS_UFS2_MAGIC) {
 1784                                 i = cbtocylno(fs, bbase);
 1785                                 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
 1786                                 cg_blktot(cgp)[i]++;
 1787                         }
 1788                 }
 1789         }
 1790         fs->fs_fmod = 1;
 1791         bdwrite(bp);
 1792 }
 1793 
 1794 int
 1795 ffs_inode_free(struct inode *pip, ino_t ino, mode_t mode)
 1796 {
 1797         struct vnode *pvp = ITOV(pip);
 1798 
 1799         if (DOINGSOFTDEP(pvp)) {
 1800                 softdep_freefile(pvp, ino, mode);
 1801                 return (0);
 1802         }
 1803 
 1804         return (ffs_freefile(pip, ino, mode));
 1805 }
 1806 
 1807 /*
 1808  * Do the actual free operation.
 1809  * The specified inode is placed back in the free map.
 1810  */
 1811 int
 1812 ffs_freefile(struct inode *pip, ino_t ino, mode_t mode)
 1813 {
 1814         struct fs *fs;
 1815         struct cg *cgp;
 1816         struct buf *bp;
 1817         int error, cg;
 1818 
 1819         fs = pip->i_fs;
 1820         if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
 1821                 panic("ffs_freefile: range: dev = 0x%x, ino = %d, fs = %s",
 1822                     pip->i_dev, ino, fs->fs_fsmnt);
 1823         cg = ino_to_cg(fs, ino);
 1824         error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 1825                 (int)fs->fs_cgsize, NOCRED, &bp);
 1826         if (error) {
 1827                 brelse(bp);
 1828                 return (error);
 1829         }
 1830         cgp = (struct cg *)bp->b_data;
 1831         if (!cg_chkmagic(cgp)) {
 1832                 brelse(bp);
 1833                 return (0);
 1834         }
 1835 
 1836         cgp->cg_ffs2_time = cgp->cg_time = time_second;
 1837 
 1838         ino %= fs->fs_ipg;
 1839         if (isclr(cg_inosused(cgp), ino)) {
 1840                 printf("dev = 0x%x, ino = %u, fs = %s\n",
 1841                     pip->i_dev, ino, fs->fs_fsmnt);
 1842                 if (fs->fs_ronly == 0)
 1843                         panic("ffs_freefile: freeing free inode");
 1844         }
 1845         clrbit(cg_inosused(cgp), ino);
 1846         if (ino < cgp->cg_irotor)
 1847                 cgp->cg_irotor = ino;
 1848         cgp->cg_cs.cs_nifree++;
 1849         fs->fs_cstotal.cs_nifree++;
 1850         fs->fs_cs(fs, cg).cs_nifree++;
 1851         if ((mode & IFMT) == IFDIR) {
 1852                 cgp->cg_cs.cs_ndir--;
 1853                 fs->fs_cstotal.cs_ndir--;
 1854                 fs->fs_cs(fs, cg).cs_ndir--;
 1855         }
 1856         fs->fs_fmod = 1;
 1857         bdwrite(bp);
 1858         return (0);
 1859 }
 1860 
 1861 #ifdef DIAGNOSTIC
 1862 /*
 1863  * Verify allocation of a block or fragment. Returns true if block or
 1864  * fragment is allocated, false if it is free.
 1865  */
 1866 int
 1867 ffs_checkblk(struct inode *ip, daddr_t bno, long size)
 1868 {
 1869         struct fs *fs;
 1870         struct cg *cgp;
 1871         struct buf *bp;
 1872         int i, error, frags, free;
 1873 
 1874         fs = ip->i_fs;
 1875         if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 1876                 printf("bsize = %d, size = %ld, fs = %s\n",
 1877                     fs->fs_bsize, size, fs->fs_fsmnt);
 1878                 panic("ffs_checkblk: bad size");
 1879         }
 1880         if ((u_int)bno >= fs->fs_size)
 1881                 panic("ffs_checkblk: bad block %d", bno);
 1882         error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
 1883                 (int)fs->fs_cgsize, NOCRED, &bp);
 1884         if (error) {
 1885                 brelse(bp);
 1886                 return (0);
 1887         }
 1888 
 1889         cgp = (struct cg *)bp->b_data;
 1890         if (!cg_chkmagic(cgp)) {
 1891                 brelse(bp);
 1892                 return (0);
 1893         }
 1894 
 1895         bno = dtogd(fs, bno);
 1896         if (size == fs->fs_bsize) {
 1897                 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
 1898         } else {
 1899                 frags = numfrags(fs, size);
 1900                 for (free = 0, i = 0; i < frags; i++)
 1901                         if (isset(cg_blksfree(cgp), bno + i))
 1902                                 free++;
 1903                 if (free != 0 && free != frags)
 1904                         panic("ffs_checkblk: partially free fragment");
 1905         }
 1906         brelse(bp);
 1907         return (!free);
 1908 }
 1909 #endif /* DIAGNOSTIC */
 1910 
 1911 
 1912 /*
 1913  * Find a block of the specified size in the specified cylinder group.
 1914  *
 1915  * It is a panic if a request is made to find a block if none are
 1916  * available.
 1917  */
 1918 daddr_t
 1919 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
 1920 {
 1921         daddr_t bno;
 1922         int start, len, loc, i;
 1923         int blk, field, subfield, pos;
 1924 
 1925         /*
 1926          * find the fragment by searching through the free block
 1927          * map for an appropriate bit pattern
 1928          */
 1929         if (bpref)
 1930                 start = dtogd(fs, bpref) / NBBY;
 1931         else
 1932                 start = cgp->cg_frotor / NBBY;
 1933         len = howmany(fs->fs_fpg, NBBY) - start;
 1934         loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
 1935                 (u_char *)fragtbl[fs->fs_frag],
 1936                 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 1937         if (loc == 0) {
 1938                 len = start + 1;
 1939                 start = 0;
 1940                 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
 1941                         (u_char *)fragtbl[fs->fs_frag],
 1942                         (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 1943                 if (loc == 0) {
 1944                         printf("start = %d, len = %d, fs = %s\n",
 1945                             start, len, fs->fs_fsmnt);
 1946                         panic("ffs_alloccg: map corrupted");
 1947                         /* NOTREACHED */
 1948                 }
 1949         }
 1950         bno = (start + len - loc) * NBBY;
 1951         cgp->cg_frotor = bno;
 1952         /*
 1953          * found the byte in the map
 1954          * sift through the bits to find the selected frag
 1955          */
 1956         for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
 1957                 blk = blkmap(fs, cg_blksfree(cgp), bno);
 1958                 blk <<= 1;
 1959                 field = around[allocsiz];
 1960                 subfield = inside[allocsiz];
 1961                 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
 1962                         if ((blk & field) == subfield)
 1963                                 return (bno + pos);
 1964                         field <<= 1;
 1965                         subfield <<= 1;
 1966                 }
 1967         }
 1968         printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
 1969         panic("ffs_alloccg: block not in map");
 1970         return (-1);
 1971 }
 1972 
 1973 /*
 1974  * Update the cluster map because of an allocation or free.
 1975  *
 1976  * Cnt == 1 means free; cnt == -1 means allocating.
 1977  */
 1978 void
 1979 ffs_clusteracct(struct fs *fs, struct cg *cgp, daddr_t blkno, int cnt)
 1980 {
 1981         int32_t *sump;
 1982         int32_t *lp;
 1983         u_char *freemapp, *mapp;
 1984         int i, start, end, forw, back, map, bit;
 1985 
 1986         if (fs->fs_contigsumsize <= 0)
 1987                 return;
 1988         freemapp = cg_clustersfree(cgp);
 1989         sump = cg_clustersum(cgp);
 1990         /*
 1991          * Allocate or clear the actual block.
 1992          */
 1993         if (cnt > 0)
 1994                 setbit(freemapp, blkno);
 1995         else
 1996                 clrbit(freemapp, blkno);
 1997         /*
 1998          * Find the size of the cluster going forward.
 1999          */
 2000         start = blkno + 1;
 2001         end = start + fs->fs_contigsumsize;
 2002         if (end >= cgp->cg_nclusterblks)
 2003                 end = cgp->cg_nclusterblks;
 2004         mapp = &freemapp[start / NBBY];
 2005         map = *mapp++;
 2006         bit = 1 << (start % NBBY);
 2007         for (i = start; i < end; i++) {
 2008                 if ((map & bit) == 0)
 2009                         break;
 2010                 if ((i & (NBBY - 1)) != (NBBY - 1)) {
 2011                         bit <<= 1;
 2012                 } else {
 2013                         map = *mapp++;
 2014                         bit = 1;
 2015                 }
 2016         }
 2017         forw = i - start;
 2018         /*
 2019          * Find the size of the cluster going backward.
 2020          */
 2021         start = blkno - 1;
 2022         end = start - fs->fs_contigsumsize;
 2023         if (end < 0)
 2024                 end = -1;
 2025         mapp = &freemapp[start / NBBY];
 2026         map = *mapp--;
 2027         bit = 1 << (start % NBBY);
 2028         for (i = start; i > end; i--) {
 2029                 if ((map & bit) == 0)
 2030                         break;
 2031                 if ((i & (NBBY - 1)) != 0) {
 2032                         bit >>= 1;
 2033                 } else {
 2034                         map = *mapp--;
 2035                         bit = 1 << (NBBY - 1);
 2036                 }
 2037         }
 2038         back = start - i;
 2039         /*
 2040          * Account for old cluster and the possibly new forward and
 2041          * back clusters.
 2042          */
 2043         i = back + forw + 1;
 2044         if (i > fs->fs_contigsumsize)
 2045                 i = fs->fs_contigsumsize;
 2046         sump[i] += cnt;
 2047         if (back > 0)
 2048                 sump[back] -= cnt;
 2049         if (forw > 0)
 2050                 sump[forw] -= cnt;
 2051         /*
 2052          * Update cluster summary information.
 2053          */
 2054         lp = &sump[fs->fs_contigsumsize];
 2055         for (i = fs->fs_contigsumsize; i > 0; i--)
 2056                 if (*lp-- > 0)
 2057                         break;
 2058         fs->fs_maxcluster[cgp->cg_cgx] = i;
 2059 }

/* [<][>][^][v][top][bottom][index][help] */