root/kern/vfs_subr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. vntblinit
  2. vfs_busy
  3. vfs_unbusy
  4. vfs_isbusy
  5. vfs_rootmountalloc
  6. vfs_mountroot
  7. vfs_getvfs
  8. vfs_getnewfsid
  9. makefstype
  10. vattr_null
  11. getnewvnode
  12. insmntque
  13. bdevvp
  14. cdevvp
  15. getdevvp
  16. checkalias
  17. vget
  18. vref
  19. vputonfreelist
  20. vput
  21. vrele
  22. vhold
  23. vfs_mount_foreach_vnode
  24. vflush_vnode
  25. vflush
  26. vclean
  27. vrecycle
  28. vgone
  29. vgonel
  30. vfinddev
  31. vdevgone
  32. vcount
  33. vprint
  34. printlockedvnodes
  35. vfs_sysctl
  36. sysctl_vnode
  37. vfs_mountedon
  38. vfs_hang_addrlist
  39. vfs_free_netcred
  40. vfs_free_addrlist
  41. vfs_export
  42. vfs_export_lookup
  43. vaccess
  44. vfs_unmountall
  45. vfs_shutdown
  46. vfs_syncwait
  47. fs_posix_sysctl
  48. fs_sysctl
  49. vwaitforio
  50. vwakeup
  51. vinvalbuf
  52. vflushbuf
  53. bgetvp
  54. brelvp
  55. buf_replacevnode
  56. reassignbuf
  57. vfs_register
  58. vfs_unregister
  59. vn_isdisk
  60. vfs_buf_print
  61. vfs_vnode_print
  62. vfs_mount_print

    1 /*      $OpenBSD: vfs_subr.c,v 1.155 2007/08/07 04:32:45 beck Exp $     */
    2 /*      $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1989, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  * (c) UNIX System Laboratories, Inc.
    8  * All or some portions of this file are derived from material licensed
    9  * to the University of California by American Telephone and Telegraph
   10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   11  * the permission of UNIX System Laboratories, Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   38  */
   39 
   40 /*
   41  * External virtual filesystem routines
   42  */
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/proc.h>
   47 #include <sys/mount.h>
   48 #include <sys/time.h>
   49 #include <sys/fcntl.h>
   50 #include <sys/kernel.h>
   51 #include <sys/vnode.h>
   52 #include <sys/stat.h>
   53 #include <sys/namei.h>
   54 #include <sys/ucred.h>
   55 #include <sys/buf.h>
   56 #include <sys/errno.h>
   57 #include <sys/malloc.h>
   58 #include <sys/domain.h>
   59 #include <sys/mbuf.h>
   60 #include <sys/syscallargs.h>
   61 #include <sys/pool.h>
   62 
   63 #include <uvm/uvm_extern.h>
   64 #include <sys/sysctl.h>
   65 
   66 #include <miscfs/specfs/specdev.h>
   67 
   68 enum vtype iftovt_tab[16] = {
   69         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
   70         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
   71 };
   72 
   73 int     vttoif_tab[9] = {
   74         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
   75         S_IFSOCK, S_IFIFO, S_IFMT,
   76 };
   77 
   78 int doforce = 1;                /* 1 => permit forcible unmounting */
   79 int prtactive = 0;              /* 1 => print out reclaim of active vnodes */
   80 int suid_clear = 1;             /* 1 => clear SUID / SGID on owner change */
   81 
   82 /*
   83  * Insq/Remq for the vnode usage lists.
   84  */
   85 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
   86 #define bufremvn(bp) {                                                  \
   87         LIST_REMOVE(bp, b_vnbufs);                                      \
   88         LIST_NEXT(bp, b_vnbufs) = NOLIST;                               \
   89 }
   90 
   91 struct freelst vnode_hold_list; /* list of vnodes referencing buffers */
   92 struct freelst vnode_free_list; /* vnode free list */
   93 
   94 struct mntlist mountlist;       /* mounted filesystem list */
   95 
   96 void    vclean(struct vnode *, int, struct proc *);
   97 
   98 void insmntque(struct vnode *, struct mount *);
   99 int getdevvp(dev_t, struct vnode **, enum vtype);
  100 
  101 int vfs_hang_addrlist(struct mount *, struct netexport *,
  102                                   struct export_args *);
  103 int vfs_free_netcred(struct radix_node *, void *);
  104 void vfs_free_addrlist(struct netexport *);
  105 void vputonfreelist(struct vnode *);
  106 
  107 int vflush_vnode(struct vnode *, void *);
  108 int maxvnodes;
  109 
  110 #ifdef DEBUG
  111 void printlockedvnodes(void);
  112 #endif
  113 
  114 struct pool vnode_pool;
  115 
  116 /*
  117  * Initialize the vnode management data structures.
  118  */
  119 void
  120 vntblinit(void)
  121 {
  122         /* buffer cache may need a vnode for each buffer */
  123         maxvnodes = desiredvnodes;
  124         pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
  125             &pool_allocator_nointr);
  126         TAILQ_INIT(&vnode_hold_list);
  127         TAILQ_INIT(&vnode_free_list);
  128         CIRCLEQ_INIT(&mountlist);
  129         /*
  130          * Initialize the filesystem syncer.
  131          */
  132         vn_initialize_syncerd();
  133 }
  134 
  135 /*
  136  * Mark a mount point as busy. Used to synchronize access and to delay
  137  * unmounting.
  138  *
  139  * Default behaviour is to attempt getting a READ lock and in case of an
  140  * ongoing unmount, to wait for it to finish and then return failure.
  141  */
  142 int
  143 vfs_busy(struct mount *mp, int flags)
  144 {
  145         int rwflags = 0;
  146 
  147         /* new mountpoints need their lock initialised */
  148         if (mp->mnt_lock.rwl_name == NULL)
  149                 rw_init(&mp->mnt_lock, "vfslock");
  150 
  151         if (flags & VB_WRITE)
  152                 rwflags |= RW_WRITE;
  153         else
  154                 rwflags |= RW_READ;
  155 
  156         if (flags & VB_WAIT)
  157                 rwflags |= RW_SLEEPFAIL;
  158         else
  159                 rwflags |= RW_NOSLEEP;
  160 
  161         if (rw_enter(&mp->mnt_lock, rwflags))
  162                 return (EBUSY);
  163 
  164         return (0);
  165 }
  166 
  167 /*
  168  * Free a busy file system
  169  */
  170 void
  171 vfs_unbusy(struct mount *mp)
  172 {
  173         rw_exit(&mp->mnt_lock);
  174 }
  175 
  176 int
  177 vfs_isbusy(struct mount *mp) 
  178 {
  179         if (RWLOCK_OWNER(&mp->mnt_lock) > 0)
  180                 return (1);
  181         else
  182                 return (0);
  183 }
  184 
  185 /*
  186  * Lookup a filesystem type, and if found allocate and initialize
  187  * a mount structure for it.
  188  *
  189  * Devname is usually updated by mount(8) after booting.
  190  */
  191 int
  192 vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
  193 {
  194         struct vfsconf *vfsp;
  195         struct mount *mp;
  196 
  197         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  198                 if (!strcmp(vfsp->vfc_name, fstypename))
  199                         break;
  200         if (vfsp == NULL)
  201                 return (ENODEV);
  202         mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK);
  203         bzero(mp, sizeof(struct mount));
  204         (void)vfs_busy(mp, VB_READ|VB_NOWAIT);
  205         LIST_INIT(&mp->mnt_vnodelist);
  206         mp->mnt_vfc = vfsp;
  207         mp->mnt_op = vfsp->vfc_vfsops;
  208         mp->mnt_flag = MNT_RDONLY;
  209         mp->mnt_vnodecovered = NULLVP;
  210         vfsp->vfc_refcount++;
  211         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  212         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  213         mp->mnt_stat.f_mntonname[0] = '/';
  214         (void)copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
  215         *mpp = mp;
  216         return (0);
  217  }
  218 
  219 /*
  220  * Find an appropriate filesystem to use for the root. If a filesystem
  221  * has not been preselected, walk through the list of known filesystems
  222  * trying those that have mountroot routines, and try them until one
  223  * works or we have tried them all.
  224  */
  225 int
  226 vfs_mountroot(void)
  227 {
  228         struct vfsconf *vfsp;
  229         int error;
  230 
  231         if (mountroot != NULL)
  232                 return ((*mountroot)());
  233         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
  234                 if (vfsp->vfc_mountroot == NULL)
  235                         continue;
  236                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
  237                         return (0);
  238                 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
  239         }
  240         return (ENODEV);
  241 }
  242 
  243 /*
  244  * Lookup a mount point by filesystem identifier.
  245  */
  246 struct mount *
  247 vfs_getvfs(fsid_t *fsid)
  248 {
  249         struct mount *mp;
  250 
  251         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
  252                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  253                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
  254                         return (mp);
  255                 }
  256         }
  257 
  258         return (NULL);
  259 }
  260 
  261 
  262 /*
  263  * Get a new unique fsid
  264  */
  265 void
  266 vfs_getnewfsid(struct mount *mp)
  267 {
  268         static u_short xxxfs_mntid;
  269 
  270         fsid_t tfsid;
  271         int mtype;
  272 
  273         mtype = mp->mnt_vfc->vfc_typenum;
  274         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
  275         mp->mnt_stat.f_fsid.val[1] = mtype;
  276         if (xxxfs_mntid == 0)
  277                 ++xxxfs_mntid;
  278         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
  279         tfsid.val[1] = mtype;
  280         if (!CIRCLEQ_EMPTY(&mountlist)) {
  281                 while (vfs_getvfs(&tfsid)) {
  282                         tfsid.val[0]++;
  283                         xxxfs_mntid++;
  284                 }
  285         }
  286         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  287 }
  288 
  289 /*
  290  * Make a 'unique' number from a mount type name.
  291  * Note that this is no longer used for ffs which
  292  * now has an on-disk filesystem id.
  293  */
  294 long
  295 makefstype(char *type)
  296 {
  297         long rv;
  298 
  299         for (rv = 0; *type; type++) {
  300                 rv <<= 2;
  301                 rv ^= *type;
  302         }
  303         return rv;
  304 }
  305 
  306 /*
  307  * Set vnode attributes to VNOVAL
  308  */
  309 void
  310 vattr_null(struct vattr *vap)
  311 {
  312 
  313         vap->va_type = VNON;
  314         /* XXX These next two used to be one line, but for a GCC bug. */
  315         vap->va_size = VNOVAL;
  316         vap->va_bytes = VNOVAL;
  317         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
  318                 vap->va_fsid = vap->va_fileid =
  319                 vap->va_blocksize = vap->va_rdev =
  320                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
  321                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
  322                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
  323                 vap->va_flags = vap->va_gen = VNOVAL;
  324         vap->va_vaflags = 0;
  325 }
  326 
  327 /*
  328  * Routines having to do with the management of the vnode table.
  329  */
  330 extern int (**dead_vnodeop_p)(void *);
  331 long numvnodes;
  332 
  333 /*
  334  * Return the next vnode from the free list.
  335  */
  336 int
  337 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
  338     struct vnode **vpp)
  339 {
  340         struct proc *p = curproc;
  341         struct freelst *listhd;
  342         static int toggle;
  343         struct vnode *vp;
  344         int s;
  345 
  346         /*
  347          * We must choose whether to allocate a new vnode or recycle an
  348          * existing one. The criterion for allocating a new one is that
  349          * the total number of vnodes is less than the number desired or
  350          * there are no vnodes on either free list. Generally we only
  351          * want to recycle vnodes that have no buffers associated with
  352          * them, so we look first on the vnode_free_list. If it is empty,
  353          * we next consider vnodes with referencing buffers on the
  354          * vnode_hold_list. The toggle ensures that half the time we
  355          * will use a buffer from the vnode_hold_list, and half the time
  356          * we will allocate a new one unless the list has grown to twice
  357          * the desired size. We are reticent to recycle vnodes from the
  358          * vnode_hold_list because we will lose the identity of all its
  359          * referencing buffers.
  360          */
  361         toggle ^= 1;
  362         if (numvnodes > 2 * maxvnodes)
  363                 toggle = 0;
  364 
  365         s = splbio();
  366         if ((numvnodes < maxvnodes) ||
  367             ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
  368             ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
  369                 splx(s);
  370                 vp = pool_get(&vnode_pool, PR_WAITOK);
  371                 bzero((char *)vp, sizeof *vp);
  372                 numvnodes++;
  373         } else {
  374                 for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
  375                     vp = TAILQ_NEXT(vp, v_freelist)) {
  376                         if (VOP_ISLOCKED(vp) == 0)
  377                                 break;
  378                 }
  379                 /*
  380                  * Unless this is a bad time of the month, at most
  381                  * the first NCPUS items on the free list are
  382                  * locked, so this is close enough to being empty.
  383                  */
  384                 if (vp == NULL) {
  385                         splx(s);
  386                         tablefull("vnode");
  387                         *vpp = 0;
  388                         return (ENFILE);
  389                 }
  390 
  391 #ifdef DIAGNOSTIC
  392                 if (vp->v_usecount) {
  393                         vprint("free vnode", vp);
  394                         panic("free vnode isn't");
  395                 }
  396 #endif
  397 
  398                 TAILQ_REMOVE(listhd, vp, v_freelist);
  399                 vp->v_bioflag &= ~VBIOONFREELIST;
  400                 splx(s);
  401 
  402                 if (vp->v_type != VBAD)
  403                         vgonel(vp, p);
  404 #ifdef DIAGNOSTIC
  405                 if (vp->v_data) {
  406                         vprint("cleaned vnode", vp);
  407                         panic("cleaned vnode isn't");
  408                 }
  409                 s = splbio();
  410                 if (vp->v_numoutput)
  411                         panic("Clean vnode has pending I/O's");
  412                 splx(s);
  413 #endif
  414                 vp->v_flag = 0;
  415                 vp->v_socket = 0;
  416         }
  417         vp->v_type = VNON;
  418         cache_purge(vp);
  419         vp->v_tag = tag;
  420         vp->v_op = vops;
  421         insmntque(vp, mp);
  422         *vpp = vp;
  423         vp->v_usecount = 1;
  424         vp->v_data = 0;
  425         simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
  426         return (0);
  427 }
  428 
  429 /*
  430  * Move a vnode from one mount queue to another.
  431  */
  432 void
  433 insmntque(struct vnode *vp, struct mount *mp)
  434 {
  435         /*
  436          * Delete from old mount point vnode list, if on one.
  437          */
  438         if (vp->v_mount != NULL)
  439                 LIST_REMOVE(vp, v_mntvnodes);
  440         /*
  441          * Insert into list of vnodes for the new mount point, if available.
  442          */
  443         if ((vp->v_mount = mp) != NULL)
  444                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  445 }
  446 
  447 /*
  448  * Create a vnode for a block device.
  449  * Used for root filesystem, argdev, and swap areas.
  450  * Also used for memory file system special devices.
  451  */
  452 int
  453 bdevvp(dev_t dev, struct vnode **vpp)
  454 {
  455         return (getdevvp(dev, vpp, VBLK));
  456 }
  457 
  458 /*
  459  * Create a vnode for a character device.
  460  * Used for console handling.
  461  */
  462 int
  463 cdevvp(dev_t dev, struct vnode **vpp)
  464 {
  465         return (getdevvp(dev, vpp, VCHR));
  466 }
  467 
  468 /*
  469  * Create a vnode for a device.
  470  * Used by bdevvp (block device) for root file system etc.,
  471  * and by cdevvp (character device) for console.
  472  */
  473 int
  474 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
  475 {
  476         struct vnode *vp;
  477         struct vnode *nvp;
  478         int error;
  479 
  480         if (dev == NODEV) {
  481                 *vpp = NULLVP;
  482                 return (0);
  483         }
  484         error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
  485         if (error) {
  486                 *vpp = NULLVP;
  487                 return (error);
  488         }
  489         vp = nvp;
  490         vp->v_type = type;
  491         if ((nvp = checkalias(vp, dev, NULL)) != 0) {
  492                 vput(vp);
  493                 vp = nvp;
  494         }
  495         *vpp = vp;
  496         return (0);
  497 }
  498 
  499 /*
  500  * Check to see if the new vnode represents a special device
  501  * for which we already have a vnode (either because of
  502  * bdevvp() or because of a different vnode representing
  503  * the same block device). If such an alias exists, deallocate
  504  * the existing contents and return the aliased vnode. The
  505  * caller is responsible for filling it with its new contents.
  506  */
  507 struct vnode *
  508 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
  509 {
  510         struct proc *p = curproc;
  511         struct vnode *vp;
  512         struct vnode **vpp;
  513 
  514         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
  515                 return (NULLVP);
  516 
  517         vpp = &speclisth[SPECHASH(nvp_rdev)];
  518 loop:
  519         for (vp = *vpp; vp; vp = vp->v_specnext) {
  520                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
  521                         continue;
  522                 }
  523                 /*
  524                  * Alias, but not in use, so flush it out.
  525                  */
  526                 if (vp->v_usecount == 0) {
  527                         vgonel(vp, p);
  528                         goto loop;
  529                 }
  530                 if (vget(vp, LK_EXCLUSIVE, p)) {
  531                         goto loop;
  532                 }
  533                 break;
  534         }
  535 
  536         /*
  537          * Common case is actually in the if statement
  538          */
  539         if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
  540                 MALLOC(nvp->v_specinfo, struct specinfo *,
  541                         sizeof(struct specinfo), M_VNODE, M_WAITOK);
  542                 nvp->v_rdev = nvp_rdev;
  543                 nvp->v_hashchain = vpp;
  544                 nvp->v_specnext = *vpp;
  545                 nvp->v_specmountpoint = NULL;
  546                 nvp->v_speclockf = NULL;
  547                 bzero(nvp->v_specbitmap, sizeof(nvp->v_specbitmap));
  548                 *vpp = nvp;
  549                 if (vp != NULLVP) {
  550                         nvp->v_flag |= VALIASED;
  551                         vp->v_flag |= VALIASED;
  552                         vput(vp);
  553                 }
  554                 return (NULLVP);
  555         }
  556 
  557         /*
  558          * This code is the uncommon case. It is called in case
  559          * we found an alias that was VT_NON && vtype of VBLK
  560          * This means we found a block device that was created
  561          * using bdevvp.
  562          * An example of such a vnode is the root partition device vnode
  563          * created in ffs_mountroot.
  564          *
  565          * The vnodes created by bdevvp should not be aliased (why?).
  566          */
  567 
  568         VOP_UNLOCK(vp, 0, p);
  569         vclean(vp, 0, p);
  570         vp->v_op = nvp->v_op;
  571         vp->v_tag = nvp->v_tag;
  572         nvp->v_type = VNON;
  573         insmntque(vp, mp);
  574         return (vp);
  575 }
  576 
  577 /*
  578  * Grab a particular vnode from the free list, increment its
  579  * reference count and lock it. If the vnode lock bit is set,
  580  * the vnode is being eliminated in vgone. In that case, we
  581  * cannot grab it, so the process is awakened when the
  582  * transition is completed, and an error code is returned to
  583  * indicate that the vnode is no longer usable, possibly
  584  * having been changed to a new file system type.
  585  */
  586 int
  587 vget(struct vnode *vp, int flags, struct proc *p)
  588 {
  589         int error, s, onfreelist;
  590 
  591         /*
  592          * If the vnode is in the process of being cleaned out for
  593          * another use, we wait for the cleaning to finish and then
  594          * return failure. Cleaning is determined by checking that
  595          * the VXLOCK flag is set.
  596          */
  597 
  598         if (vp->v_flag & VXLOCK) {
  599                 if (flags & LK_NOWAIT) {
  600                         return (EBUSY);
  601                 }
  602 
  603                 vp->v_flag |= VXWANT;
  604                 ltsleep(vp, PINOD | PNORELOCK, "vget", 0, NULL);
  605                 return (ENOENT);
  606         }
  607 
  608         onfreelist = vp->v_bioflag & VBIOONFREELIST;
  609         if (vp->v_usecount == 0 && onfreelist) {
  610                 s = splbio();
  611                 if (vp->v_holdcnt > 0)
  612                         TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
  613                 else
  614                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  615                 vp->v_bioflag &= ~VBIOONFREELIST;
  616                 splx(s);
  617         }
  618 
  619         vp->v_usecount++;
  620         if (flags & LK_TYPE_MASK) {
  621                 if ((error = vn_lock(vp, flags, p)) != 0) {
  622                         vp->v_usecount--;
  623                         if (vp->v_usecount == 0 && onfreelist)
  624                                 vputonfreelist(vp);
  625                 }
  626                 return (error);
  627         }
  628 
  629         return (0);
  630 }
  631 
  632 
  633 /* Vnode reference. */
  634 void
  635 vref(struct vnode *vp)
  636 {
  637 #ifdef DIAGNOSTIC
  638         if (vp->v_usecount == 0)
  639                 panic("vref used where vget required");
  640 #endif
  641         vp->v_usecount++;
  642 }
  643 
  644 void
  645 vputonfreelist(struct vnode *vp)
  646 {
  647         int s;
  648         struct freelst *lst;
  649 
  650         s = splbio();
  651 #ifdef DIAGNOSTIC
  652         if (vp->v_usecount != 0)
  653                 panic("Use count is not zero!");
  654 
  655         if (vp->v_bioflag & VBIOONFREELIST) {
  656                 vprint("vnode already on free list: ", vp);
  657                 panic("vnode already on free list");
  658         }
  659 #endif
  660 
  661         vp->v_bioflag |= VBIOONFREELIST;
  662 
  663         if (vp->v_holdcnt > 0)
  664                 lst = &vnode_hold_list;
  665         else
  666                 lst = &vnode_free_list;
  667 
  668         if (vp->v_type == VBAD)
  669                 TAILQ_INSERT_HEAD(lst, vp, v_freelist);
  670         else
  671                 TAILQ_INSERT_TAIL(lst, vp, v_freelist);
  672 
  673         splx(s);
  674 }
  675 
  676 /*
  677  * vput(), just unlock and vrele()
  678  */
  679 void
  680 vput(struct vnode *vp)
  681 {
  682         struct proc *p = curproc;
  683 
  684 #ifdef DIAGNOSTIC
  685         if (vp == NULL)
  686                 panic("vput: null vp");
  687 #endif
  688 
  689 #ifdef DIAGNOSTIC
  690         if (vp->v_usecount == 0) {
  691                 vprint("vput: bad ref count", vp);
  692                 panic("vput: ref cnt");
  693         }
  694 #endif
  695         vp->v_usecount--;
  696         if (vp->v_usecount > 0) {
  697                 VOP_UNLOCK(vp, 0, p);
  698                 return;
  699         }
  700 
  701 #ifdef DIAGNOSTIC
  702         if (vp->v_writecount != 0) {
  703                 vprint("vput: bad writecount", vp);
  704                 panic("vput: v_writecount != 0");
  705         }
  706 #endif
  707 
  708         VOP_INACTIVE(vp, p);
  709 
  710         if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
  711                 vputonfreelist(vp);
  712 }
  713 
  714 /*
  715  * Vnode release - use for active VNODES.
  716  * If count drops to zero, call inactive routine and return to freelist.
  717  */
  718 void
  719 vrele(struct vnode *vp)
  720 {
  721         struct proc *p = curproc;
  722 
  723 #ifdef DIAGNOSTIC
  724         if (vp == NULL)
  725                 panic("vrele: null vp");
  726 #endif
  727 #ifdef DIAGNOSTIC
  728         if (vp->v_usecount == 0) {
  729                 vprint("vrele: bad ref count", vp);
  730                 panic("vrele: ref cnt");
  731         }
  732 #endif
  733         vp->v_usecount--;
  734         if (vp->v_usecount > 0) {
  735                 return;
  736         }
  737 
  738 #ifdef DIAGNOSTIC
  739         if (vp->v_writecount != 0) {
  740                 vprint("vrele: bad writecount", vp);
  741                 panic("vrele: v_writecount != 0");
  742         }
  743 #endif
  744 
  745         if (vn_lock(vp, LK_EXCLUSIVE, p)) {
  746 #ifdef DIAGNOSTIC
  747                 vprint("vrele: cannot lock", vp);
  748 #endif
  749                 return;
  750         }
  751 
  752         VOP_INACTIVE(vp, p);
  753 
  754         if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
  755                 vputonfreelist(vp);
  756 }
  757 
  758 void vhold(struct vnode *vp);
  759 
  760 /*
  761  * Page or buffer structure gets a reference.
  762  */
  763 void
  764 vhold(struct vnode *vp)
  765 {
  766         /*
  767          * If it is on the freelist and the hold count is currently
  768          * zero, move it to the hold list.
  769          */
  770         if ((vp->v_bioflag & VBIOONFREELIST) &&
  771             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
  772                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  773                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
  774         }
  775         vp->v_holdcnt++;
  776 }
  777 
  778 /*
  779  * Remove any vnodes in the vnode table belonging to mount point mp.
  780  *
  781  * If MNT_NOFORCE is specified, there should not be any active ones,
  782  * return error if any are found (nb: this is a user error, not a
  783  * system error). If MNT_FORCE is specified, detach any active vnodes
  784  * that are found.
  785  */
  786 #ifdef DEBUG
  787 int busyprt = 0;        /* print out busy vnodes */
  788 struct ctldebug debug1 = { "busyprt", &busyprt };
  789 #endif
  790 
  791 int
  792 vfs_mount_foreach_vnode(struct mount *mp, 
  793     int (*func)(struct vnode *, void *), void *arg) {
  794         struct vnode *vp, *nvp;
  795         int error = 0;
  796 
  797 loop:
  798         for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
  799                 if (vp->v_mount != mp)
  800                         goto loop;
  801                 nvp = LIST_NEXT(vp, v_mntvnodes);
  802 
  803                 error = func(vp, arg);
  804 
  805                 if (error != 0)
  806                         break;
  807         }
  808 
  809         return (error);
  810 }
  811 
  812 struct vflush_args {
  813         struct vnode *skipvp;
  814         int busy;
  815         int flags;
  816 };
  817 
  818 int
  819 vflush_vnode(struct vnode *vp, void *arg) {
  820         struct vflush_args *va = arg;
  821         struct proc *p = curproc;
  822 
  823         if (vp == va->skipvp) {
  824                 return (0);
  825         }
  826 
  827         if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
  828                 return (0);
  829         }
  830 
  831         /*
  832          * If WRITECLOSE is set, only flush out regular file
  833          * vnodes open for writing.
  834          */
  835         if ((va->flags & WRITECLOSE) &&
  836             (vp->v_writecount == 0 || vp->v_type != VREG)) {
  837                 return (0);
  838         }
  839 
  840         /*
  841          * With v_usecount == 0, all we need to do is clear
  842          * out the vnode data structures and we are done.
  843          */
  844         if (vp->v_usecount == 0) {
  845                 vgonel(vp, p);
  846                 return (0);
  847         }
  848                 
  849         /*
  850          * If FORCECLOSE is set, forcibly close the vnode.
  851          * For block or character devices, revert to an
  852          * anonymous device. For all other files, just kill them.
  853          */
  854         if (va->flags & FORCECLOSE) {
  855                 if (vp->v_type != VBLK && vp->v_type != VCHR) {
  856                         vgonel(vp, p);
  857                 } else {
  858                         vclean(vp, 0, p);
  859                         vp->v_op = spec_vnodeop_p;
  860                         insmntque(vp, (struct mount *)0);
  861                 }
  862                 return (0);
  863         }
  864 
  865 #ifdef DEBUG
  866         if (busyprt)
  867                 vprint("vflush: busy vnode", vp);
  868 #endif
  869         va->busy++;
  870         return (0);
  871 }
  872 
  873 int
  874 vflush(struct mount *mp, struct vnode *skipvp, int flags)
  875 {
  876         struct vflush_args va;
  877         va.skipvp = skipvp;
  878         va.busy = 0;
  879         va.flags = flags;
  880 
  881         vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
  882 
  883         if (va.busy)
  884                 return (EBUSY);
  885         return (0);
  886 }
  887 
  888 /*
  889  * Disassociate the underlying file system from a vnode.
  890  */
  891 void
  892 vclean(struct vnode *vp, int flags, struct proc *p)
  893 {
  894         int active;
  895 
  896         /*
  897          * Check to see if the vnode is in use.
  898          * If so we have to reference it before we clean it out
  899          * so that its count cannot fall to zero and generate a
  900          * race against ourselves to recycle it.
  901          */
  902         if ((active = vp->v_usecount) != 0)
  903                 vp->v_usecount++;
  904 
  905         /*
  906          * Prevent the vnode from being recycled or
  907          * brought into use while we clean it out.
  908          */
  909         if (vp->v_flag & VXLOCK)
  910                 panic("vclean: deadlock");
  911         vp->v_flag |= VXLOCK;
  912         /*
  913          * Even if the count is zero, the VOP_INACTIVE routine may still
  914          * have the object locked while it cleans it out. The VOP_LOCK
  915          * ensures that the VOP_INACTIVE routine is done with its work.
  916          * For active vnodes, it ensures that no other activity can
  917          * occur while the underlying object is being cleaned out.
  918          */
  919         VOP_LOCK(vp, LK_DRAIN, p);
  920 
  921         /*
  922          * Clean out any VM data associated with the vnode.
  923          */
  924         uvm_vnp_terminate(vp);
  925         /*
  926          * Clean out any buffers associated with the vnode.
  927          */
  928         if (flags & DOCLOSE)
  929                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
  930         /*
  931          * If purging an active vnode, it must be closed and
  932          * deactivated before being reclaimed. Note that the
  933          * VOP_INACTIVE will unlock the vnode
  934          */
  935         if (active) {
  936                 if (flags & DOCLOSE)
  937                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
  938                 VOP_INACTIVE(vp, p);
  939         } else {
  940                 /*
  941                  * Any other processes trying to obtain this lock must first
  942                  * wait for VXLOCK to clear, then call the new lock operation.
  943                  */
  944                 VOP_UNLOCK(vp, 0, p);
  945         }
  946 
  947         /*
  948          * Reclaim the vnode.
  949          */
  950         if (VOP_RECLAIM(vp, p))
  951                 panic("vclean: cannot reclaim");
  952         if (active) {
  953                 vp->v_usecount--;
  954                 if (vp->v_usecount == 0) {
  955                         if (vp->v_holdcnt > 0)
  956                                 panic("vclean: not clean");
  957                         vputonfreelist(vp);
  958                 }
  959         }
  960         cache_purge(vp);
  961 
  962         /*
  963          * Done with purge, notify sleepers of the grim news.
  964          */
  965         vp->v_op = dead_vnodeop_p;
  966         VN_KNOTE(vp, NOTE_REVOKE);
  967         vp->v_tag = VT_NON;
  968         vp->v_flag &= ~VXLOCK;
  969 #ifdef VFSDEBUG
  970         vp->v_flag &= ~VLOCKSWORK;
  971 #endif
  972         if (vp->v_flag & VXWANT) {
  973                 vp->v_flag &= ~VXWANT;
  974                 wakeup(vp);
  975         }
  976 }
  977 
  978 /*
  979  * Recycle an unused vnode to the front of the free list.
  980  */
  981 int
  982 vrecycle(struct vnode *vp, struct proc *p)
  983 {
  984         if (vp->v_usecount == 0) {
  985                 vgonel(vp, p);
  986                 return (1);
  987         }
  988         return (0);
  989 }
  990 
  991 /*
  992  * Eliminate all activity associated with a vnode
  993  * in preparation for reuse.
  994  */
  995 void
  996 vgone(struct vnode *vp)
  997 {
  998         struct proc *p = curproc;
  999         vgonel(vp, p);
 1000 }
 1001 
 1002 /*
 1003  * vgone, with struct proc.
 1004  */
 1005 void
 1006 vgonel(struct vnode *vp, struct proc *p)
 1007 {
 1008         struct vnode *vq;
 1009         struct vnode *vx;
 1010         struct mount *mp;
 1011         int flags;
 1012 
 1013         /*
 1014          * If a vgone (or vclean) is already in progress,
 1015          * wait until it is done and return.
 1016          */
 1017         if (vp->v_flag & VXLOCK) {
 1018                 vp->v_flag |= VXWANT;
 1019                 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, NULL);
 1020                 return;
 1021         }
 1022 
 1023         /*
 1024          * Clean out the filesystem specific data.
 1025          */
 1026         vclean(vp, DOCLOSE, p);
 1027         /*
 1028          * Delete from old mount point vnode list, if on one.
 1029          */
 1030         if (vp->v_mount != NULL)
 1031                 insmntque(vp, (struct mount *)0);
 1032         /*
 1033          * If special device, remove it from special device alias list
 1034          * if it is on one.
 1035          */
 1036         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 1037                 if (*vp->v_hashchain == vp) {
 1038                         *vp->v_hashchain = vp->v_specnext;
 1039                 } else {
 1040                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1041                                 if (vq->v_specnext != vp)
 1042                                         continue;
 1043                                 vq->v_specnext = vp->v_specnext;
 1044                                 break;
 1045                         }
 1046                         if (vq == NULL)
 1047                                 panic("missing bdev");
 1048                 }
 1049                 if (vp->v_flag & VALIASED) {
 1050                         vx = NULL;
 1051                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1052                                 if (vq->v_rdev != vp->v_rdev ||
 1053                                     vq->v_type != vp->v_type)
 1054                                         continue;
 1055                                 if (vx)
 1056                                         break;
 1057                                 vx = vq;
 1058                         }
 1059                         if (vx == NULL)
 1060                                 panic("missing alias");
 1061                         if (vq == NULL)
 1062                                 vx->v_flag &= ~VALIASED;
 1063                         vp->v_flag &= ~VALIASED;
 1064                 }
 1065 
 1066                 /*
 1067                  * If we have a mount point associated with the vnode, we must
 1068                  * flush it out now, as to not leave a dangling zombie mount
 1069                  * point laying around in VFS.
 1070                  */
 1071                 mp = vp->v_specmountpoint;
 1072                 if (mp != NULL) {
 1073                         if (!vfs_busy(mp, VB_WRITE|VB_WAIT)) {
 1074                                 flags = MNT_FORCE | MNT_DOOMED;
 1075                                 dounmount(mp, flags, p, NULL);
 1076                         }
 1077                 }
 1078 
 1079                 FREE(vp->v_specinfo, M_VNODE);
 1080                 vp->v_specinfo = NULL;
 1081         }
 1082         /*
 1083          * If it is on the freelist and not already at the head,
 1084          * move it to the head of the list.
 1085          */
 1086         vp->v_type = VBAD;
 1087 
 1088         /*
 1089          * Move onto the free list, unless we were called from
 1090          * getnewvnode and we're not on any free list
 1091          */
 1092         if (vp->v_usecount == 0 &&
 1093             (vp->v_bioflag & VBIOONFREELIST)) {
 1094                 int s;
 1095 
 1096                 s = splbio();
 1097 
 1098                 if (vp->v_holdcnt > 0)
 1099                         panic("vgonel: not clean");
 1100 
 1101                 if (TAILQ_FIRST(&vnode_free_list) != vp) {
 1102                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1103                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 1104                 }
 1105                 splx(s);
 1106         }
 1107 }
 1108 
 1109 /*
 1110  * Lookup a vnode by device number.
 1111  */
 1112 int
 1113 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
 1114 {
 1115         struct vnode *vp;
 1116         int rc =0;
 1117 
 1118         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1119                 if (dev != vp->v_rdev || type != vp->v_type)
 1120                         continue;
 1121                 *vpp = vp;
 1122                 rc = 1;
 1123                 break;
 1124         }
 1125         return (rc);
 1126 }
 1127 
 1128 /*
 1129  * Revoke all the vnodes corresponding to the specified minor number
 1130  * range (endpoints inclusive) of the specified major.
 1131  */
 1132 void
 1133 vdevgone(int maj, int minl, int minh, enum vtype type)
 1134 {
 1135         struct vnode *vp;
 1136         int mn;
 1137 
 1138         for (mn = minl; mn <= minh; mn++)
 1139                 if (vfinddev(makedev(maj, mn), type, &vp))
 1140                         VOP_REVOKE(vp, REVOKEALL);
 1141 }
 1142 
 1143 /*
 1144  * Calculate the total number of references to a special device.
 1145  */
 1146 int
 1147 vcount(struct vnode *vp)
 1148 {
 1149         struct vnode *vq, *vnext;
 1150         int count;
 1151 
 1152 loop:
 1153         if ((vp->v_flag & VALIASED) == 0)
 1154                 return (vp->v_usecount);
 1155         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 1156                 vnext = vq->v_specnext;
 1157                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 1158                         continue;
 1159                 /*
 1160                  * Alias, but not in use, so flush it out.
 1161                  */
 1162                 if (vq->v_usecount == 0 && vq != vp) {
 1163                         vgone(vq);
 1164                         goto loop;
 1165                 }
 1166                 count += vq->v_usecount;
 1167         }
 1168         return (count);
 1169 }
 1170 
 1171 #if defined(DEBUG) || defined(DIAGNOSTIC)
 1172 /*
 1173  * Print out a description of a vnode.
 1174  */
 1175 static char *typename[] =
 1176    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
 1177 
 1178 void
 1179 vprint(char *label, struct vnode *vp)
 1180 {
 1181         char buf[64];
 1182 
 1183         if (label != NULL)
 1184                 printf("%s: ", label);
 1185         printf("%p, type %s, use %u, write %u, hold %u,",
 1186                 vp, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 1187                 vp->v_holdcnt);
 1188         buf[0] = '\0';
 1189         if (vp->v_flag & VROOT)
 1190                 strlcat(buf, "|VROOT", sizeof buf);
 1191         if (vp->v_flag & VTEXT)
 1192                 strlcat(buf, "|VTEXT", sizeof buf);
 1193         if (vp->v_flag & VSYSTEM)
 1194                 strlcat(buf, "|VSYSTEM", sizeof buf);
 1195         if (vp->v_flag & VXLOCK)
 1196                 strlcat(buf, "|VXLOCK", sizeof buf);
 1197         if (vp->v_flag & VXWANT)
 1198                 strlcat(buf, "|VXWANT", sizeof buf);
 1199         if (vp->v_bioflag & VBIOWAIT)
 1200                 strlcat(buf, "|VBIOWAIT", sizeof buf);
 1201         if (vp->v_bioflag & VBIOONFREELIST)
 1202                 strlcat(buf, "|VBIOONFREELIST", sizeof buf);
 1203         if (vp->v_bioflag & VBIOONSYNCLIST)
 1204                 strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
 1205         if (vp->v_flag & VALIASED)
 1206                 strlcat(buf, "|VALIASED", sizeof buf);
 1207         if (buf[0] != '\0')
 1208                 printf(" flags (%s)", &buf[1]);
 1209         if (vp->v_data == NULL) {
 1210                 printf("\n");
 1211         } else {
 1212                 printf("\n\t");
 1213                 VOP_PRINT(vp);
 1214         }
 1215 }
 1216 #endif /* DEBUG || DIAGNOSTIC */
 1217 
 1218 #ifdef DEBUG
 1219 /*
 1220  * List all of the locked vnodes in the system.
 1221  * Called when debugging the kernel.
 1222  */
 1223 void
 1224 printlockedvnodes(void)
 1225 {
 1226         struct mount *mp, *nmp;
 1227         struct vnode *vp;
 1228 
 1229         printf("Locked vnodes\n");
 1230 
 1231         for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
 1232             mp = nmp) {
 1233                 if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
 1234                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1235                         continue;
 1236                 }
 1237                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 1238                         if (VOP_ISLOCKED(vp))
 1239                                 vprint((char *)0, vp);
 1240                 }
 1241                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1242                 vfs_unbusy(mp);
 1243         }
 1244 
 1245 }
 1246 #endif
 1247 
 1248 /*
 1249  * Top level filesystem related information gathering.
 1250  */
 1251 int
 1252 vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 1253     size_t newlen, struct proc *p)
 1254 {
 1255         struct vfsconf *vfsp, *tmpvfsp;
 1256         int ret;
 1257 
 1258         /* all sysctl names at this level are at least name and field */
 1259         if (namelen < 2)
 1260                 return (ENOTDIR);               /* overloaded */
 1261 
 1262         if (name[0] != VFS_GENERIC) {
 1263                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 1264                         if (vfsp->vfc_typenum == name[0])
 1265                                 break;
 1266 
 1267                 if (vfsp == NULL)
 1268                         return (EOPNOTSUPP);
 1269 
 1270                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 1271                     oldp, oldlenp, newp, newlen, p));
 1272         }
 1273 
 1274         switch (name[1]) {
 1275         case VFS_MAXTYPENUM:
 1276                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
 1277 
 1278         case VFS_CONF:
 1279                 if (namelen < 3)
 1280                         return (ENOTDIR);       /* overloaded */
 1281 
 1282                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 1283                         if (vfsp->vfc_typenum == name[2])
 1284                                 break;
 1285 
 1286                 if (vfsp == NULL)
 1287                         return (EOPNOTSUPP);
 1288 
 1289                 /* Make a copy, clear out kernel pointers */
 1290                 tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK);
 1291                 bcopy(vfsp, tmpvfsp, sizeof(*tmpvfsp));
 1292                 tmpvfsp->vfc_vfsops = NULL;
 1293                 tmpvfsp->vfc_mountroot = NULL;
 1294                 tmpvfsp->vfc_next = NULL;
 1295 
 1296                 ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
 1297                     sizeof(struct vfsconf));
 1298 
 1299                 free(tmpvfsp, M_TEMP);
 1300                 return (ret);
 1301         }
 1302 
 1303         return (EOPNOTSUPP);
 1304 }
 1305 
 1306 int kinfo_vdebug = 1;
 1307 #define KINFO_VNODESLOP 10
 1308 /*
 1309  * Dump vnode list (via sysctl).
 1310  * Copyout address of vnode followed by vnode.
 1311  */
 1312 /* ARGSUSED */
 1313 int
 1314 sysctl_vnode(char *where, size_t *sizep, struct proc *p)
 1315 {
 1316         struct mount *mp, *nmp;
 1317         struct vnode *vp, *nvp;
 1318         char *bp = where, *savebp;
 1319         char *ewhere;
 1320         int error;
 1321 
 1322         if (where == NULL) {
 1323                 *sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
 1324                 return (0);
 1325         }
 1326         ewhere = where + *sizep;
 1327 
 1328         for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
 1329             mp = nmp) {
 1330                 if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
 1331                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1332                         continue;
 1333                 }
 1334                 savebp = bp;
 1335 again:
 1336                 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
 1337                     vp = nvp) {
 1338                         /*
 1339                          * Check that the vp is still associated with
 1340                          * this filesystem.  RACE: could have been
 1341                          * recycled onto the same filesystem.
 1342                          */
 1343                         if (vp->v_mount != mp) {
 1344                                 if (kinfo_vdebug)
 1345                                         printf("kinfo: vp changed\n");
 1346                                 bp = savebp;
 1347                                 goto again;
 1348                         }
 1349                         nvp = LIST_NEXT(vp, v_mntvnodes);
 1350                         if (bp + sizeof(struct e_vnode) > ewhere) {
 1351                                 *sizep = bp - where;
 1352                                 vfs_unbusy(mp);
 1353                                 return (ENOMEM);
 1354                         }
 1355                         if ((error = copyout(&vp,
 1356                             &((struct e_vnode *)bp)->vptr,
 1357                             sizeof(struct vnode *))) ||
 1358                            (error = copyout(vp,
 1359                             &((struct e_vnode *)bp)->vnode,
 1360                             sizeof(struct vnode)))) {
 1361                                 vfs_unbusy(mp);
 1362                                 return (error);
 1363                         }
 1364                         bp += sizeof(struct e_vnode);
 1365                 }
 1366 
 1367                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1368                 vfs_unbusy(mp);
 1369         }
 1370 
 1371         *sizep = bp - where;
 1372 
 1373         return (0);
 1374 }
 1375 
 1376 /*
 1377  * Check to see if a filesystem is mounted on a block device.
 1378  */
 1379 int
 1380 vfs_mountedon(struct vnode *vp)
 1381 {
 1382         struct vnode *vq;
 1383         int error = 0;
 1384 
 1385         if (vp->v_specmountpoint != NULL)
 1386                 return (EBUSY);
 1387         if (vp->v_flag & VALIASED) {
 1388                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1389                         if (vq->v_rdev != vp->v_rdev ||
 1390                             vq->v_type != vp->v_type)
 1391                                 continue;
 1392                         if (vq->v_specmountpoint != NULL) {
 1393                                 error = EBUSY;
 1394                                 break;
 1395                         }
 1396                 }
 1397         }
 1398         return (error);
 1399 }
 1400 
 1401 /*
 1402  * Build hash lists of net addresses and hang them off the mount point.
 1403  * Called by ufs_mount() to set up the lists of export addresses.
 1404  */
 1405 int
 1406 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
 1407     struct export_args *argp)
 1408 {
 1409         struct netcred *np;
 1410         struct radix_node_head *rnh;
 1411         int i;
 1412         struct radix_node *rn;
 1413         struct sockaddr *saddr, *smask = 0;
 1414         struct domain *dom;
 1415         int error;
 1416 
 1417         if (argp->ex_addrlen == 0) {
 1418                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 1419                         return (EPERM);
 1420                 np = &nep->ne_defexported;
 1421                 np->netc_exflags = argp->ex_flags;
 1422                 np->netc_anon = argp->ex_anon;
 1423                 np->netc_anon.cr_ref = 1;
 1424                 mp->mnt_flag |= MNT_DEFEXPORTED;
 1425                 return (0);
 1426         }
 1427         if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
 1428             argp->ex_addrlen < 0 || argp->ex_masklen < 0)
 1429                 return (EINVAL);
 1430         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 1431         np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
 1432         bzero(np, i);
 1433         saddr = (struct sockaddr *)(np + 1);
 1434         error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
 1435         if (error)
 1436                 goto out;
 1437         if (saddr->sa_len > argp->ex_addrlen)
 1438                 saddr->sa_len = argp->ex_addrlen;
 1439         if (argp->ex_masklen) {
 1440                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
 1441                 error = copyin(argp->ex_mask, smask, argp->ex_masklen);
 1442                 if (error)
 1443                         goto out;
 1444                 if (smask->sa_len > argp->ex_masklen)
 1445                         smask->sa_len = argp->ex_masklen;
 1446         }
 1447         i = saddr->sa_family;
 1448         if (i < 0 || i > AF_MAX) {
 1449                 error = EINVAL;
 1450                 goto out;
 1451         }
 1452         if ((rnh = nep->ne_rtable[i]) == 0) {
 1453                 /*
 1454                  * Seems silly to initialize every AF when most are not
 1455                  * used, do so on demand here
 1456                  */
 1457                 for (dom = domains; dom; dom = dom->dom_next)
 1458                         if (dom->dom_family == i && dom->dom_rtattach) {
 1459                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
 1460                                         dom->dom_rtoffset);
 1461                                 break;
 1462                         }
 1463                 if ((rnh = nep->ne_rtable[i]) == 0) {
 1464                         error = ENOBUFS;
 1465                         goto out;
 1466                 }
 1467         }
 1468         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
 1469                 np->netc_rnodes);
 1470         if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
 1471                 error = EPERM;
 1472                 goto out;
 1473         }
 1474         np->netc_exflags = argp->ex_flags;
 1475         np->netc_anon = argp->ex_anon;
 1476         np->netc_anon.cr_ref = 1;
 1477         return (0);
 1478 out:
 1479         free(np, M_NETADDR);
 1480         return (error);
 1481 }
 1482 
 1483 /* ARGSUSED */
 1484 int
 1485 vfs_free_netcred(struct radix_node *rn, void *w)
 1486 {
 1487         struct radix_node_head *rnh = (struct radix_node_head *)w;
 1488 
 1489         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
 1490         free(rn, M_NETADDR);
 1491         return (0);
 1492 }
 1493 
 1494 /*
 1495  * Free the net address hash lists that are hanging off the mount points.
 1496  */
 1497 void
 1498 vfs_free_addrlist(struct netexport *nep)
 1499 {
 1500         int i;
 1501         struct radix_node_head *rnh;
 1502 
 1503         for (i = 0; i <= AF_MAX; i++)
 1504                 if ((rnh = nep->ne_rtable[i]) != NULL) {
 1505                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
 1506                         free(rnh, M_RTABLE);
 1507                         nep->ne_rtable[i] = 0;
 1508                 }
 1509 }
 1510 
 1511 int
 1512 vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
 1513 {
 1514         int error;
 1515 
 1516         if (argp->ex_flags & MNT_DELEXPORT) {
 1517                 vfs_free_addrlist(nep);
 1518                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 1519         }
 1520         if (argp->ex_flags & MNT_EXPORTED) {
 1521                 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
 1522                         return (error);
 1523                 mp->mnt_flag |= MNT_EXPORTED;
 1524         }
 1525         return (0);
 1526 }
 1527 
 1528 struct netcred *
 1529 vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
 1530 {
 1531         struct netcred *np;
 1532         struct radix_node_head *rnh;
 1533         struct sockaddr *saddr;
 1534 
 1535         np = NULL;
 1536         if (mp->mnt_flag & MNT_EXPORTED) {
 1537                 /*
 1538                  * Lookup in the export list first.
 1539                  */
 1540                 if (nam != NULL) {
 1541                         saddr = mtod(nam, struct sockaddr *);
 1542                         rnh = nep->ne_rtable[saddr->sa_family];
 1543                         if (rnh != NULL) {
 1544                                 np = (struct netcred *)
 1545                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
 1546                                             rnh);
 1547                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 1548                                         np = NULL;
 1549                         }
 1550                 }
 1551                 /*
 1552                  * If no address match, use the default if it exists.
 1553                  */
 1554                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 1555                         np = &nep->ne_defexported;
 1556         }
 1557         return (np);
 1558 }
 1559 
 1560 /*
 1561  * Do the usual access checking.
 1562  * file_mode, uid and gid are from the vnode in question,
 1563  * while acc_mode and cred are from the VOP_ACCESS parameter list
 1564  */
 1565 int
 1566 vaccess(mode_t file_mode, uid_t uid, gid_t gid, mode_t acc_mode,
 1567     struct ucred *cred)
 1568 {
 1569         mode_t mask;
 1570 
 1571         /* User id 0 always gets access. */
 1572         if (cred->cr_uid == 0)
 1573                 return 0;
 1574 
 1575         mask = 0;
 1576 
 1577         /* Otherwise, check the owner. */
 1578         if (cred->cr_uid == uid) {
 1579                 if (acc_mode & VEXEC)
 1580                         mask |= S_IXUSR;
 1581                 if (acc_mode & VREAD)
 1582                         mask |= S_IRUSR;
 1583                 if (acc_mode & VWRITE)
 1584                         mask |= S_IWUSR;
 1585                 return (file_mode & mask) == mask ? 0 : EACCES;
 1586         }
 1587 
 1588         /* Otherwise, check the groups. */
 1589         if (cred->cr_gid == gid || groupmember(gid, cred)) {
 1590                 if (acc_mode & VEXEC)
 1591                         mask |= S_IXGRP;
 1592                 if (acc_mode & VREAD)
 1593                         mask |= S_IRGRP;
 1594                 if (acc_mode & VWRITE)
 1595                         mask |= S_IWGRP;
 1596                 return (file_mode & mask) == mask ? 0 : EACCES;
 1597         }
 1598 
 1599         /* Otherwise, check everyone else. */
 1600         if (acc_mode & VEXEC)
 1601                 mask |= S_IXOTH;
 1602         if (acc_mode & VREAD)
 1603                 mask |= S_IROTH;
 1604         if (acc_mode & VWRITE)
 1605                 mask |= S_IWOTH;
 1606         return (file_mode & mask) == mask ? 0 : EACCES;
 1607 }
 1608 
 1609 /*
 1610  * Unmount all file systems.
 1611  * We traverse the list in reverse order under the assumption that doing so
 1612  * will avoid needing to worry about dependencies.
 1613  */
 1614 void
 1615 vfs_unmountall(void)
 1616 {
 1617         struct mount *mp, *nmp;
 1618         int allerror, error, again = 1;
 1619 
 1620  retry:
 1621         allerror = 0;
 1622         for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
 1623             mp = nmp) {
 1624                 nmp = CIRCLEQ_PREV(mp, mnt_list);
 1625                 if ((vfs_busy(mp, VB_WRITE|VB_NOWAIT)) != 0)
 1626                         continue;
 1627                 if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
 1628                         printf("unmount of %s failed with error %d\n",
 1629                             mp->mnt_stat.f_mntonname, error);
 1630                         allerror = 1;
 1631                 }
 1632         }
 1633 
 1634         if (allerror) {
 1635                 printf("WARNING: some file systems would not unmount\n");
 1636                 if (again) {
 1637                         printf("retrying\n");
 1638                         again = 0;
 1639                         goto retry;
 1640                 }
 1641         }
 1642 }
 1643 
 1644 /*
 1645  * Sync and unmount file systems before shutting down.
 1646  */
 1647 void
 1648 vfs_shutdown(void)
 1649 {
 1650 #ifdef ACCOUNTING
 1651         extern void acct_shutdown(void);
 1652 
 1653         acct_shutdown();
 1654 #endif
 1655 
 1656         /* XXX Should suspend scheduling. */
 1657         (void) spl0();
 1658 
 1659         printf("syncing disks... ");
 1660 
 1661         if (panicstr == 0) {
 1662                 /* Sync before unmount, in case we hang on something. */
 1663                 sys_sync(&proc0, (void *)0, (register_t *)0);
 1664 
 1665                 /* Unmount file systems. */
 1666                 vfs_unmountall();
 1667         }
 1668 
 1669         if (vfs_syncwait(1))
 1670                 printf("giving up\n");
 1671         else
 1672                 printf("done\n");
 1673 }
 1674 
 1675 /*
 1676  * perform sync() operation and wait for buffers to flush.
 1677  * assumtions: called w/ scheduler disabled and physical io enabled
 1678  * for now called at spl0() XXX
 1679  */
 1680 int
 1681 vfs_syncwait(int verbose)
 1682 {
 1683         struct buf *bp;
 1684         int iter, nbusy, dcount, s;
 1685         struct proc *p;
 1686 
 1687         p = curproc? curproc : &proc0;
 1688         sys_sync(p, (void *)0, (register_t *)0);
 1689 
 1690         /* Wait for sync to finish. */
 1691         dcount = 10000;
 1692         for (iter = 0; iter < 20; iter++) {
 1693                 nbusy = 0;
 1694                 LIST_FOREACH(bp, &bufhead, b_list) {
 1695                         if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
 1696                                 nbusy++;
 1697                         /*
 1698                          * With soft updates, some buffers that are
 1699                          * written will be remarked as dirty until other
 1700                          * buffers are written.
 1701                          */
 1702                         if (bp->b_flags & B_DELWRI) {
 1703                                 s = splbio();
 1704                                 bremfree(bp);
 1705                                 bp->b_flags |= B_BUSY;
 1706                                 splx(s);
 1707                                 nbusy++;
 1708                                 bawrite(bp);
 1709                                 if (dcount-- <= 0) {
 1710                                         if (verbose)
 1711                                                 printf("softdep ");
 1712                                         return 1;
 1713                                 }
 1714                         }
 1715                 }
 1716                 if (nbusy == 0)
 1717                         break;
 1718                 if (verbose)
 1719                         printf("%d ", nbusy);
 1720                 DELAY(40000 * iter);
 1721         }
 1722 
 1723         return nbusy;
 1724 }
 1725 
 1726 /*
 1727  * posix file system related system variables.
 1728  */
 1729 int
 1730 fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
 1731     void *newp, size_t newlen, struct proc *p)
 1732 {
 1733         /* all sysctl names at this level are terminal */
 1734         if (namelen != 1)
 1735                 return (ENOTDIR);
 1736 
 1737         switch (name[0]) {
 1738         case FS_POSIX_SETUID:
 1739                 if (newp && securelevel > 0)
 1740                         return (EPERM);
 1741                 return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
 1742         default:
 1743                 return (EOPNOTSUPP);
 1744         }
 1745         /* NOTREACHED */
 1746 }
 1747 
 1748 /*
 1749  * file system related system variables.
 1750  */
 1751 int
 1752 fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 1753     size_t newlen, struct proc *p)
 1754 {
 1755         sysctlfn *fn;
 1756 
 1757         switch (name[0]) {
 1758         case FS_POSIX:
 1759                 fn = fs_posix_sysctl;
 1760                 break;
 1761         default:
 1762                 return (EOPNOTSUPP);
 1763         }
 1764         return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
 1765 }
 1766 
 1767 
 1768 /*
 1769  * Routines dealing with vnodes and buffers
 1770  */
 1771 
 1772 /*
 1773  * Wait for all outstanding I/Os to complete
 1774  *
 1775  * Manipulates v_numoutput. Must be called at splbio()
 1776  */
 1777 int
 1778 vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
 1779 {
 1780         int error = 0;
 1781 
 1782         splassert(IPL_BIO);
 1783 
 1784         while (vp->v_numoutput) {
 1785                 vp->v_bioflag |= VBIOWAIT;
 1786                 error = tsleep(&vp->v_numoutput,
 1787                     slpflag | (PRIBIO + 1), wmesg, timeo);
 1788                 if (error)
 1789                         break;
 1790         }
 1791 
 1792         return (error);
 1793 }
 1794 
 1795 /*
 1796  * Update outstanding I/O count and do wakeup if requested.
 1797  *
 1798  * Manipulates v_numoutput. Must be called at splbio()
 1799  */
 1800 void
 1801 vwakeup(struct vnode *vp)
 1802 {
 1803         splassert(IPL_BIO);
 1804 
 1805         if (vp != NULL) {
 1806                 if (vp->v_numoutput-- == 0)
 1807                         panic("vwakeup: neg numoutput");
 1808                 if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
 1809                         vp->v_bioflag &= ~VBIOWAIT;
 1810                         wakeup(&vp->v_numoutput);
 1811                 }
 1812         }
 1813 }
 1814 
 1815 /*
 1816  * Flush out and invalidate all buffers associated with a vnode.
 1817  * Called with the underlying object locked.
 1818  */
 1819 int
 1820 vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
 1821     int slpflag, int slptimeo)
 1822 {
 1823         struct buf *bp;
 1824         struct buf *nbp, *blist;
 1825         int s, error;
 1826 
 1827 #ifdef VFSDEBUG
 1828         if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
 1829                 panic("vinvalbuf(): vp isn't locked");
 1830 #endif
 1831 
 1832         if (flags & V_SAVE) {
 1833                 s = splbio();
 1834                 vwaitforio(vp, 0, "vinvalbuf", 0);
 1835                 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
 1836                         splx(s);
 1837                         if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
 1838                                 return (error);
 1839                         s = splbio();
 1840                         if (vp->v_numoutput > 0 ||
 1841                             !LIST_EMPTY(&vp->v_dirtyblkhd))
 1842                                 panic("vinvalbuf: dirty bufs");
 1843                 }
 1844                 splx(s);
 1845         }
 1846 loop:
 1847         s = splbio();
 1848         for (;;) {
 1849                 if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
 1850                     (flags & V_SAVEMETA))
 1851                         while (blist && blist->b_lblkno < 0)
 1852                                 blist = LIST_NEXT(blist, b_vnbufs);
 1853                 if (blist == NULL &&
 1854                     (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
 1855                     (flags & V_SAVEMETA))
 1856                         while (blist && blist->b_lblkno < 0)
 1857                                 blist = LIST_NEXT(blist, b_vnbufs);
 1858                 if (!blist)
 1859                         break;
 1860 
 1861                 for (bp = blist; bp; bp = nbp) {
 1862                         nbp = LIST_NEXT(bp, b_vnbufs);
 1863                         if (flags & V_SAVEMETA && bp->b_lblkno < 0)
 1864                                 continue;
 1865                         if (bp->b_flags & B_BUSY) {
 1866                                 bp->b_flags |= B_WANTED;
 1867                                 error = tsleep(bp, slpflag | (PRIBIO + 1),
 1868                                     "vinvalbuf", slptimeo);
 1869                                 if (error) {
 1870                                         splx(s);
 1871                                         return (error);
 1872                                 }
 1873                                 break;
 1874                         }
 1875                         bremfree(bp);
 1876                         bp->b_flags |= B_BUSY;
 1877                         /*
 1878                          * XXX Since there are no node locks for NFS, I believe
 1879                          * there is a slight chance that a delayed write will
 1880                          * occur while sleeping just above, so check for it.
 1881                          */
 1882                         if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
 1883                                 splx(s);
 1884                                 (void) VOP_BWRITE(bp);
 1885                                 goto loop;
 1886                         }
 1887                         bp->b_flags |= B_INVAL;
 1888                         brelse(bp);
 1889                 }
 1890         }
 1891         if (!(flags & V_SAVEMETA) &&
 1892             (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
 1893                 panic("vinvalbuf: flush failed");
 1894         splx(s);
 1895         return (0);
 1896 }
 1897 
 1898 void
 1899 vflushbuf(struct vnode *vp, int sync)
 1900 {
 1901         struct buf *bp, *nbp;
 1902         int s;
 1903 
 1904 loop:
 1905         s = splbio();
 1906         for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
 1907             bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
 1908                 nbp = LIST_NEXT(bp, b_vnbufs);
 1909                 if ((bp->b_flags & B_BUSY))
 1910                         continue;
 1911                 if ((bp->b_flags & B_DELWRI) == 0)
 1912                         panic("vflushbuf: not dirty");
 1913                 bremfree(bp);
 1914                 bp->b_flags |= B_BUSY;
 1915                 splx(s);
 1916                 /*
 1917                  * Wait for I/O associated with indirect blocks to complete,
 1918                  * since there is no way to quickly wait for them below.
 1919                  */
 1920                 if (bp->b_vp == vp || sync == 0)
 1921                         (void) bawrite(bp);
 1922                 else
 1923                         (void) bwrite(bp);
 1924                 goto loop;
 1925         }
 1926         if (sync == 0) {
 1927                 splx(s);
 1928                 return;
 1929         }
 1930         vwaitforio(vp, 0, "vflushbuf", 0);
 1931         if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
 1932                 splx(s);
 1933 #ifdef DIAGNOSTIC
 1934                 vprint("vflushbuf: dirty", vp);
 1935 #endif
 1936                 goto loop;
 1937         }
 1938         splx(s);
 1939 }
 1940 
 1941 /*
 1942  * Associate a buffer with a vnode.
 1943  *
 1944  * Manipulates buffer vnode queues. Must be called at splbio().
 1945  */
 1946 void
 1947 bgetvp(struct vnode *vp, struct buf *bp)
 1948 {
 1949         splassert(IPL_BIO);
 1950 
 1951 
 1952         if (bp->b_vp)
 1953                 panic("bgetvp: not free");
 1954         vhold(vp);
 1955         bp->b_vp = vp;
 1956         if (vp->v_type == VBLK || vp->v_type == VCHR)
 1957                 bp->b_dev = vp->v_rdev;
 1958         else
 1959                 bp->b_dev = NODEV;
 1960         /*
 1961          * Insert onto list for new vnode.
 1962          */
 1963         bufinsvn(bp, &vp->v_cleanblkhd);
 1964 }
 1965 
 1966 /*
 1967  * Disassociate a buffer from a vnode.
 1968  *
 1969  * Manipulates vnode buffer queues. Must be called at splbio().
 1970  */
 1971 void
 1972 brelvp(struct buf *bp)
 1973 {
 1974         struct vnode *vp;
 1975 
 1976         splassert(IPL_BIO);
 1977 
 1978         if ((vp = bp->b_vp) == (struct vnode *) 0)
 1979                 panic("brelvp: NULL");
 1980         /*
 1981          * Delete from old vnode list, if on one.
 1982          */
 1983         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 1984                 bufremvn(bp);
 1985         if ((vp->v_bioflag & VBIOONSYNCLIST) &&
 1986             LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 1987                 vp->v_bioflag &= ~VBIOONSYNCLIST;
 1988                 LIST_REMOVE(vp, v_synclist);
 1989         }
 1990         bp->b_vp = (struct vnode *) 0;
 1991 
 1992 #ifdef DIAGNOSTIC
 1993         if (vp->v_holdcnt == 0)
 1994                 panic("brelvp: holdcnt");
 1995 #endif
 1996         vp->v_holdcnt--;
 1997 
 1998         /*
 1999          * If it is on the holdlist and the hold count drops to
 2000          * zero, move it to the free list.
 2001          */
 2002         if ((vp->v_bioflag & VBIOONFREELIST) &&
 2003             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 2004                 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
 2005                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 2006         }
 2007 }
 2008 
 2009 /*
 2010  * Replaces the current vnode associated with the buffer, if any,
 2011  * with a new vnode.
 2012  *
 2013  * If an output I/O is pending on the buffer, the old vnode
 2014  * I/O count is adjusted.
 2015  *
 2016  * Ignores vnode buffer queues. Must be called at splbio().
 2017  */
 2018 void
 2019 buf_replacevnode(struct buf *bp, struct vnode *newvp)
 2020 {
 2021         struct vnode *oldvp = bp->b_vp;
 2022 
 2023         splassert(IPL_BIO);
 2024 
 2025         if (oldvp)
 2026                 brelvp(bp);
 2027 
 2028         if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
 2029                 newvp->v_numoutput++;   /* put it on swapdev */
 2030                 vwakeup(oldvp);
 2031         }
 2032 
 2033         bgetvp(newvp, bp);
 2034         bufremvn(bp);
 2035 }
 2036 
 2037 /*
 2038  * Used to assign buffers to the appropriate clean or dirty list on
 2039  * the vnode and to add newly dirty vnodes to the appropriate
 2040  * filesystem syncer list.
 2041  *
 2042  * Manipulates vnode buffer queues. Must be called at splbio().
 2043  */
 2044 void
 2045 reassignbuf(struct buf *bp)
 2046 {
 2047         struct buflists *listheadp;
 2048         int delay;
 2049         struct vnode *vp = bp->b_vp;
 2050 
 2051         splassert(IPL_BIO);
 2052 
 2053         /*
 2054          * Delete from old vnode list, if on one.
 2055          */
 2056         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 2057                 bufremvn(bp);
 2058 
 2059         /*
 2060          * If dirty, put on list of dirty buffers;
 2061          * otherwise insert onto list of clean buffers.
 2062          */
 2063         if ((bp->b_flags & B_DELWRI) == 0) {
 2064                 listheadp = &vp->v_cleanblkhd;
 2065                 if ((vp->v_bioflag & VBIOONSYNCLIST) &&
 2066                     LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 2067                         vp->v_bioflag &= ~VBIOONSYNCLIST;
 2068                         LIST_REMOVE(vp, v_synclist);
 2069                 }
 2070         } else {
 2071                 listheadp = &vp->v_dirtyblkhd;
 2072                 if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
 2073                         switch (vp->v_type) {
 2074                         case VDIR:
 2075                                 delay = syncdelay / 2;
 2076                                 break;
 2077                         case VBLK:
 2078                                 if (vp->v_specmountpoint != NULL) {
 2079                                         delay = syncdelay / 3;
 2080                                         break;
 2081                                 }
 2082                                 /* FALLTHROUGH */
 2083                         default:
 2084                                 delay = syncdelay;
 2085                         }
 2086                         vn_syncer_add_to_worklist(vp, delay);
 2087                 }
 2088         }
 2089         bufinsvn(bp, listheadp);
 2090 }
 2091 
 2092 int
 2093 vfs_register(struct vfsconf *vfs)
 2094 {
 2095         struct vfsconf *vfsp;
 2096         struct vfsconf **vfspp;
 2097 
 2098 #ifdef DIAGNOSTIC
 2099         /* Paranoia? */
 2100         if (vfs->vfc_refcount != 0)
 2101                 printf("vfs_register called with vfc_refcount > 0\n");
 2102 #endif
 2103 
 2104         /* Check if filesystem already known */
 2105         for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
 2106             vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
 2107                 if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
 2108                         return (EEXIST);
 2109 
 2110         if (vfs->vfc_typenum > maxvfsconf)
 2111                 maxvfsconf = vfs->vfc_typenum;
 2112 
 2113         vfs->vfc_next = NULL;
 2114 
 2115         /* Add to the end of the list */
 2116         *vfspp = vfs;
 2117 
 2118         /* Call vfs_init() */
 2119         if (vfs->vfc_vfsops->vfs_init)
 2120                 (*(vfs->vfc_vfsops->vfs_init))(vfs);
 2121 
 2122         return 0;
 2123 }
 2124 
 2125 int
 2126 vfs_unregister(struct vfsconf *vfs)
 2127 {
 2128         struct vfsconf *vfsp;
 2129         struct vfsconf **vfspp;
 2130         int maxtypenum;
 2131 
 2132         /* Find our vfsconf struct */
 2133         for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
 2134             vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
 2135                 if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
 2136                         break;
 2137         }
 2138 
 2139         if (!vfsp)                      /* Not found */
 2140                 return (ENOENT);
 2141 
 2142         if (vfsp->vfc_refcount)         /* In use */
 2143                 return (EBUSY);
 2144 
 2145         /* Remove from list and free */
 2146         *vfspp = vfsp->vfc_next;
 2147 
 2148         maxtypenum = 0;
 2149 
 2150         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 2151                 if (vfsp->vfc_typenum > maxtypenum)
 2152                         maxtypenum = vfsp->vfc_typenum;
 2153 
 2154         maxvfsconf = maxtypenum;
 2155         return 0;
 2156 }
 2157 
 2158 /*
 2159  * Check if vnode represents a disk device
 2160  */
 2161 int
 2162 vn_isdisk(struct vnode *vp, int *errp)
 2163 {
 2164         if (vp->v_type != VBLK && vp->v_type != VCHR)
 2165                 return (0);
 2166 
 2167         return (1);
 2168 }
 2169 
 2170 #ifdef DDB
 2171 #include <machine/db_machdep.h>
 2172 #include <ddb/db_interface.h>
 2173 #include <ddb/db_output.h>
 2174 
 2175 void
 2176 vfs_buf_print(struct buf *bp, int full, int (*pr)(const char *, ...))
 2177 {
 2178 
 2179         (*pr)("  vp %p lblkno 0x%llx blkno 0x%llx dev 0x%x\n"
 2180               "  proc %p error %d flags %b\n",
 2181             bp->b_vp, (int64_t)bp->b_lblkno, (int64_t)bp->b_blkno, bp->b_dev,
 2182             bp->b_proc, bp->b_error, bp->b_flags, B_BITS);
 2183 
 2184         (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx sync 0x%x\n"
 2185               "  data %p saveaddr %p dep %p iodone %p\n",
 2186             bp->b_bufsize, bp->b_bcount, (long)bp->b_resid, bp->b_synctime,
 2187             bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep), bp->b_iodone);
 2188 
 2189         (*pr)("  dirty {off 0x%x end 0x%x} valid {off 0x%x end 0x%x}\n",
 2190             bp->b_dirtyoff, bp->b_dirtyend, bp->b_validoff, bp->b_validend);
 2191 
 2192 #ifdef FFS_SOFTUPDATES
 2193         if (full)
 2194                 softdep_print(bp, full, pr);
 2195 #endif
 2196 }
 2197 
 2198 const char *vtypes[] = { VTYPE_NAMES };
 2199 const char *vtags[] = { VTAG_NAMES };
 2200 
 2201 void
 2202 vfs_vnode_print(struct vnode *vp, int full, int (*pr)(const char *, ...))
 2203 {
 2204 
 2205 #define NENTS(n)        (sizeof n / sizeof(n[0]))
 2206         (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
 2207               vp->v_tag > NENTS(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag,
 2208               vp->v_type > NENTS(vtypes)? "<unk>":vtypes[vp->v_type],
 2209               vp->v_type, vp->v_mount, vp->v_mountedhere);
 2210 
 2211         (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
 2212               vp->v_data, vp->v_usecount, vp->v_writecount,
 2213               vp->v_holdcnt, vp->v_numoutput);
 2214 
 2215         /* uvm_object_printit(&vp->v_uobj, full, pr); */
 2216 
 2217         if (full) {
 2218                 struct buf *bp;
 2219 
 2220                 (*pr)("clean bufs:\n");
 2221                 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
 2222                         (*pr)(" bp %p\n", bp);
 2223                         vfs_buf_print(bp, full, pr);
 2224                 }
 2225 
 2226                 (*pr)("dirty bufs:\n");
 2227                 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
 2228                         (*pr)(" bp %p\n", bp);
 2229                         vfs_buf_print(bp, full, pr);
 2230                 }
 2231         }
 2232 }
 2233 
 2234 void
 2235 vfs_mount_print(struct mount *mp, int full, int (*pr)(const char *, ...))
 2236 {
 2237         struct vfsconf *vfc = mp->mnt_vfc;
 2238         struct vnode *vp;
 2239         int cnt = 0;
 2240 
 2241         (*pr)("flags %b\nvnodecovered %p syncer %p data %p\n",
 2242             mp->mnt_flag, MNT_BITS,
 2243             mp->mnt_vnodecovered, mp->mnt_syncer, mp->mnt_data);
 2244 
 2245         (*pr)("vfsconf: ops %p name \"%s\" num %d ref %d flags 0x%x\n",
 2246             vfc->vfc_vfsops, vfc->vfc_name, vfc->vfc_typenum,
 2247             vfc->vfc_refcount, vfc->vfc_flags);
 2248 
 2249         (*pr)("statvfs cache: bsize %x iosize %x\nblocks %u free %u avail %u\n",
 2250             mp->mnt_stat.f_bsize, mp->mnt_stat.f_iosize, mp->mnt_stat.f_blocks,
 2251             mp->mnt_stat.f_bfree, mp->mnt_stat.f_bavail);
 2252 
 2253         (*pr)("  files %u ffiles %u\n", mp->mnt_stat.f_files,
 2254             mp->mnt_stat.f_ffree);
 2255 
 2256         (*pr)("  f_fsidx {0x%x, 0x%x} owner %u ctime 0x%x\n",
 2257             mp->mnt_stat.f_fsid.val[0], mp->mnt_stat.f_fsid.val[1],
 2258             mp->mnt_stat.f_owner, mp->mnt_stat.f_ctime);
 2259 
 2260         (*pr)("  syncwrites %lu asyncwrites = %lu\n",
 2261             mp->mnt_stat.f_syncwrites, mp->mnt_stat.f_asyncwrites);
 2262 
 2263         (*pr)("  fstype \"%s\" mnton \"%s\" mntfrom \"%s\"\n",
 2264             mp->mnt_stat.f_fstypename, mp->mnt_stat.f_mntonname,
 2265             mp->mnt_stat.f_mntfromname);
 2266 
 2267         (*pr)("locked vnodes:");
 2268         /* XXX would take mountlist lock, except ddb has no context */
 2269         LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
 2270                 if (VOP_ISLOCKED(vp)) {
 2271                         if (!LIST_NEXT(vp, v_mntvnodes))
 2272                                 (*pr)(" %p", vp);
 2273                         else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
 2274                                 (*pr)("\n\t%p", vp);
 2275                         else
 2276                                 (*pr)(", %p", vp);
 2277                 }
 2278         (*pr)("\n");
 2279 
 2280         if (full) {
 2281                 (*pr)("all vnodes:\n\t");
 2282                 /* XXX would take mountlist lock, except ddb has no context */
 2283                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
 2284                         if (!LIST_NEXT(vp, v_mntvnodes))
 2285                                 (*pr)(" %p", vp);
 2286                         else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
 2287                                 (*pr)(" %p,\n\t", vp);
 2288                         else
 2289                                 (*pr)(" %p,", vp);
 2290                 (*pr)("\n", vp);
 2291         }
 2292 }
 2293 #endif /* DDB */

/* [<][>][^][v][top][bottom][index][help] */