root/dev/vnd.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. vndencrypt
  2. vndattach
  3. vndopen
  4. vndgetdisklabel
  5. vndclose
  6. vndstrategy
  7. vndstart
  8. vndiodone
  9. vndread
  10. vndwrite
  11. vndioctl
  12. vndsetcred
  13. vndshutdown
  14. vndclear
  15. vndsize
  16. vnddump

    1 /*      $OpenBSD: vnd.c,v 1.79 2007/06/20 18:15:46 deraadt Exp $        */
    2 /*      $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $        */
    3 
    4 /*
    5  * Copyright (c) 1988 University of Utah.
    6  * Copyright (c) 1990, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  * from: Utah $Hdr: vn.c 1.13 94/04/02$
   38  *
   39  *      @(#)vn.c        8.6 (Berkeley) 4/1/94
   40  */
   41 
   42 /*
   43  * Vnode disk driver.
   44  *
   45  * Block/character interface to a vnode.  Allows one to treat a file
   46  * as a disk (e.g. build a filesystem in it, mount it, etc.).
   47  *
   48  * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the
   49  * vnode or simple VOP_READ/VOP_WRITE.  The former is suitable for swapping
   50  * as it doesn't distort the local buffer cache.  The latter is good for
   51  * building disk images as it keeps the cache consistent after the block
   52  * device is closed.
   53  *
   54  * NOTE 2: There is a security issue involved with this driver.
   55  * Once mounted all access to the contents of the "mapped" file via
   56  * the special file is controlled by the permissions on the special
   57  * file, the protection of the mapped file is ignored (effectively,
   58  * by using root credentials in all transactions).
   59  *
   60  * NOTE 3: Doesn't interact with leases, should it?
   61  */
   62 
   63 #include <sys/param.h>
   64 #include <sys/systm.h>
   65 #include <sys/namei.h>
   66 #include <sys/proc.h>
   67 #include <sys/errno.h>
   68 #include <sys/buf.h>
   69 #include <sys/malloc.h>
   70 #include <sys/pool.h>
   71 #include <sys/ioctl.h>
   72 #include <sys/disklabel.h>
   73 #include <sys/device.h>
   74 #include <sys/disk.h>
   75 #include <sys/stat.h>
   76 #include <sys/mount.h>
   77 #include <sys/vnode.h>
   78 #include <sys/file.h>
   79 #include <sys/rwlock.h>
   80 #include <sys/uio.h>
   81 #include <sys/conf.h>
   82 
   83 #include <crypto/blf.h>
   84 
   85 #include <miscfs/specfs/specdev.h>
   86 
   87 #include <dev/vndioctl.h>
   88 
   89 #ifdef VNDDEBUG
   90 int dovndcluster = 1;
   91 int vnddebug = 0x00;
   92 #define VDB_FOLLOW      0x01
   93 #define VDB_INIT        0x02
   94 #define VDB_IO          0x04
   95 #define DNPRINTF(f, p...)       do { if ((f) & vnddebug) printf(p); } while (0)
   96 #else
   97 #define DNPRINTF(f, p...)       /* nothing */
   98 #endif  /* VNDDEBUG */
   99 
  100 /*
  101  * vndunit is a bit weird.  have to reconstitute the dev_t for
  102  * DISKUNIT(), but with the minor masked off.
  103  */
  104 #define vndunit(x)      DISKUNIT(makedev(major(x), minor(x) & 0x7ff))
  105 #define vndsimple(x)    (minor(x) & 0x800)
  106 
  107 /* same as MAKEDISKDEV, preserving the vndsimple() property */
  108 #define VNDLABELDEV(dev)        \
  109         makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \
  110             (vndsimple(dev) ? 0x800 : 0))
  111 
  112 struct vndbuf {
  113         struct buf      vb_buf;
  114         struct buf      *vb_obp;
  115 };
  116 
  117 /*
  118  * struct vndbuf allocator
  119  */
  120 struct pool     vndbufpl;
  121 
  122 #define getvndbuf()     pool_get(&vndbufpl, PR_WAITOK)
  123 #define putvndbuf(vbp)  pool_put(&vndbufpl, vbp);
  124 
  125 struct vnd_softc {
  126         struct device    sc_dev;
  127         struct disk      sc_dk;
  128 
  129         char             sc_file[VNDNLEN];      /* file we're covering */
  130         int              sc_flags;              /* flags */
  131         size_t           sc_size;               /* size of vnd in blocks */
  132         struct vnode    *sc_vp;                 /* vnode */
  133         struct ucred    *sc_cred;               /* credentials */
  134         struct buf       sc_tab;                /* transfer queue */
  135         blf_ctx         *sc_keyctx;             /* key context */
  136         struct rwlock    sc_rwlock;
  137 };
  138 
  139 /* sc_flags */
  140 #define VNF_ALIVE       0x0001
  141 #define VNF_INITED      0x0002
  142 #define VNF_LABELLING   0x0100
  143 #define VNF_WLABEL      0x0200
  144 #define VNF_HAVELABEL   0x0400
  145 #define VNF_SIMPLE      0x1000
  146 #define VNF_READONLY    0x2000
  147 
  148 #define VNDRW(v)        ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE)
  149 
  150 struct vnd_softc *vnd_softc;
  151 int numvnd = 0;
  152 
  153 struct dkdriver vnddkdriver = { vndstrategy };
  154 
  155 /* called by main() at boot time */
  156 void    vndattach(int);
  157 
  158 void    vndclear(struct vnd_softc *);
  159 void    vndstart(struct vnd_softc *);
  160 int     vndsetcred(struct vnd_softc *, struct ucred *);
  161 void    vndiodone(struct buf *);
  162 void    vndshutdown(void);
  163 void    vndgetdisklabel(dev_t, struct vnd_softc *);
  164 void    vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int);
  165 
  166 #define vndlock(sc) rw_enter(&sc->sc_rwlock, RW_WRITE|RW_INTR)
  167 #define vndunlock(sc) rw_exit_write(&sc->sc_rwlock)
  168 
  169 void
  170 vndencrypt(struct vnd_softc *vnd, caddr_t addr, size_t size, daddr64_t off,
  171     int encrypt)
  172 {
  173         int i, bsize;
  174         u_char iv[8];
  175 
  176         bsize = dbtob(1);
  177         for (i = 0; i < size/bsize; i++) {
  178                 bzero(iv, sizeof(iv));
  179                 bcopy((u_char *)&off, iv, sizeof(off));
  180                 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv));
  181                 if (encrypt)
  182                         blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize);
  183                 else
  184                         blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize);
  185 
  186                 addr += bsize;
  187                 off++;
  188         }
  189 }
  190 
  191 void
  192 vndattach(int num)
  193 {
  194         char *mem;
  195         u_long size;
  196         int i;
  197 
  198         if (num <= 0)
  199                 return;
  200         size = num * sizeof(struct vnd_softc);
  201         mem = malloc(size, M_DEVBUF, M_NOWAIT);
  202         if (mem == NULL) {
  203                 printf("WARNING: no memory for vnode disks\n");
  204                 return;
  205         }
  206         bzero(mem, size);
  207         vnd_softc = (struct vnd_softc *)mem;
  208         for (i = 0; i < num; i++) {
  209                 rw_init(&vnd_softc[i].sc_rwlock, "vndlock");
  210         }
  211         numvnd = num;
  212 
  213         pool_init(&vndbufpl, sizeof(struct vndbuf), 0, 0, 0, "vndbufpl", NULL);
  214         pool_setlowat(&vndbufpl, 16);
  215         pool_sethiwat(&vndbufpl, 1024);
  216 }
  217 
  218 int
  219 vndopen(dev_t dev, int flags, int mode, struct proc *p)
  220 {
  221         int unit = vndunit(dev);
  222         struct vnd_softc *sc;
  223         int error = 0, part, pmask;
  224 
  225         DNPRINTF(VDB_FOLLOW, "vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p);
  226 
  227         if (unit >= numvnd)
  228                 return (ENXIO);
  229         sc = &vnd_softc[unit];
  230 
  231         if ((error = vndlock(sc)) != 0)
  232                 return (error);
  233 
  234         if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
  235                 error = EROFS;
  236                 goto bad;
  237         }
  238 
  239         if ((sc->sc_flags & VNF_INITED) &&
  240             (sc->sc_flags & VNF_HAVELABEL) == 0) {
  241                 sc->sc_flags |= VNF_HAVELABEL;
  242                 vndgetdisklabel(dev, sc);
  243         }
  244 
  245         part = DISKPART(dev);
  246         pmask = 1 << part;
  247 
  248         /*
  249          * If any partition is open, all succeeding openings must be of the
  250          * same type or read-only.
  251          */
  252         if (sc->sc_dk.dk_openmask) {
  253                 if (((sc->sc_flags & VNF_SIMPLE) != 0) !=
  254                     (vndsimple(dev) != 0) && (flags & FWRITE)) {
  255                         error = EBUSY;
  256                         goto bad;
  257                 }
  258         } else if (vndsimple(dev))
  259                 sc->sc_flags |= VNF_SIMPLE;
  260         else
  261                 sc->sc_flags &= ~VNF_SIMPLE;
  262 
  263         /* Check that the partition exists. */
  264         if (part != RAW_PART &&
  265             ((sc->sc_flags & VNF_HAVELABEL) == 0 ||
  266             part >= sc->sc_dk.dk_label->d_npartitions ||
  267             sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
  268                 error = ENXIO;
  269                 goto bad;
  270         }
  271 
  272         /* Prevent our unit from being unconfigured while open. */
  273         switch (mode) {
  274         case S_IFCHR:
  275                 sc->sc_dk.dk_copenmask |= pmask;
  276                 break;
  277 
  278         case S_IFBLK:
  279                 sc->sc_dk.dk_bopenmask |= pmask;
  280                 break;
  281         }
  282         sc->sc_dk.dk_openmask =
  283             sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
  284 
  285         error = 0;
  286 bad:
  287         vndunlock(sc);
  288         return (error);
  289 }
  290 
  291 /*
  292  * Load the label information on the named device
  293  */
  294 void
  295 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
  296 {
  297         struct disklabel *lp = sc->sc_dk.dk_label;
  298         char *errstring = NULL;
  299 
  300         bzero(lp, sizeof(struct disklabel));
  301 
  302         lp->d_secsize = 512;
  303         lp->d_ntracks = 1;
  304         lp->d_nsectors = 100;
  305         lp->d_ncylinders = sc->sc_size / 100;
  306         lp->d_secpercyl = 100;          /* lp->d_ntracks * lp->d_nsectors */
  307 
  308         strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename));
  309         lp->d_type = DTYPE_VND;
  310         strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
  311         DL_SETDSIZE(lp, sc->sc_size);
  312         lp->d_rpm = 3600;
  313         lp->d_interleave = 1;
  314         lp->d_flags = 0;
  315         lp->d_version = 1;
  316 
  317         lp->d_magic = DISKMAGIC;
  318         lp->d_magic2 = DISKMAGIC;
  319         lp->d_checksum = dkcksum(lp);
  320 
  321         /* Call the generic disklabel extraction routine */
  322         errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 0);
  323         if (errstring) {
  324                 DNPRINTF(VDB_IO, "%s: %s\n", sc->sc_dev.dv_xname,
  325                     errstring);
  326                 return;
  327         }
  328 }
  329 
  330 int
  331 vndclose(dev_t dev, int flags, int mode, struct proc *p)
  332 {
  333         int unit = vndunit(dev);
  334         struct vnd_softc *sc;
  335         int error = 0, part;
  336 
  337         DNPRINTF(VDB_FOLLOW, "vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p);
  338 
  339         if (unit >= numvnd)
  340                 return (ENXIO);
  341         sc = &vnd_softc[unit];
  342 
  343         if ((error = vndlock(sc)) != 0)
  344                 return (error);
  345 
  346         part = DISKPART(dev);
  347 
  348         /* ...that much closer to allowing unconfiguration... */
  349         switch (mode) {
  350         case S_IFCHR:
  351                 sc->sc_dk.dk_copenmask &= ~(1 << part);
  352                 break;
  353 
  354         case S_IFBLK:
  355                 sc->sc_dk.dk_bopenmask &= ~(1 << part);
  356                 break;
  357         }
  358         sc->sc_dk.dk_openmask =
  359             sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
  360 
  361         vndunlock(sc);
  362         return (0);
  363 }
  364 
  365 /*
  366  * Two methods are used, the traditional buffercache bypassing and the
  367  * newer, cache-coherent on unmount, one.
  368  *
  369  * Former method:
  370  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
  371  * Note that this driver can only be used for swapping over NFS on the hp
  372  * since nfs_strategy on the vax cannot handle u-areas and page tables.
  373  *
  374  * Latter method:
  375  * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to
  376  * access the underlying file.
  377  */
  378 void
  379 vndstrategy(struct buf *bp)
  380 {
  381         int unit = vndunit(bp->b_dev);
  382         struct vnd_softc *vnd = &vnd_softc[unit];
  383         struct vndbuf *nbp;
  384         int bsize;
  385         off_t bn;
  386         caddr_t addr;
  387         size_t resid;
  388         int sz, flags, error, s;
  389         struct iovec aiov;
  390         struct uio auio;
  391         struct proc *p = curproc;
  392 
  393         DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit);
  394 
  395         if ((vnd->sc_flags & VNF_INITED) == 0) {
  396                 bp->b_error = ENXIO;
  397                 bp->b_flags |= B_ERROR;
  398                 s = splbio();
  399                 biodone(bp);
  400                 splx(s);
  401                 return;
  402         }
  403 
  404         bn = bp->b_blkno;
  405         bp->b_resid = bp->b_bcount;
  406 
  407         if (bn < 0) {
  408                 bp->b_error = EINVAL;
  409                 bp->b_flags |= B_ERROR;
  410                 s = splbio();
  411                 biodone(bp);
  412                 splx(s);
  413                 return;
  414         }
  415 
  416         /* If we have a label, do a boundary check. */
  417         if (vnd->sc_flags & VNF_HAVELABEL) {
  418                 if (bounds_check_with_label(bp, vnd->sc_dk.dk_label, 1) <= 0) {
  419                         s = splbio();
  420                         biodone(bp);
  421                         splx(s);
  422                         return;
  423                 }
  424 
  425                 /*
  426                  * bounds_check_with_label() changes bp->b_resid, reset it
  427                  */
  428                 bp->b_resid = bp->b_bcount;
  429         }
  430 
  431         sz = howmany(bp->b_bcount, DEV_BSIZE);
  432 
  433         /* No bypassing of buffer cache?  */
  434         if (vndsimple(bp->b_dev)) {
  435                 /* Loop until all queued requests are handled.  */
  436                 for (;;) {
  437                         int part = DISKPART(bp->b_dev);
  438                         daddr64_t off = DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part]);
  439 
  440                         aiov.iov_base = bp->b_data;
  441                         auio.uio_resid = aiov.iov_len = bp->b_bcount;
  442                         auio.uio_iov = &aiov;
  443                         auio.uio_iovcnt = 1;
  444                         auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
  445                         auio.uio_segflg = UIO_SYSSPACE;
  446                         auio.uio_procp = p;
  447 
  448                         vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
  449                         if (bp->b_flags & B_READ) {
  450                                 auio.uio_rw = UIO_READ;
  451                                 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0,
  452                                     vnd->sc_cred);
  453                                 if (vnd->sc_keyctx)
  454                                         vndencrypt(vnd, bp->b_data,
  455                                            bp->b_bcount, bp->b_blkno, 0);
  456                         } else {
  457                                 if (vnd->sc_keyctx)
  458                                         vndencrypt(vnd, bp->b_data,
  459                                            bp->b_bcount, bp->b_blkno, 1);
  460                                 auio.uio_rw = UIO_WRITE;
  461                                 /*
  462                                  * Upper layer has already checked I/O for
  463                                  * limits, so there is no need to do it again.
  464                                  */
  465                                 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio,
  466                                     IO_NOLIMIT, vnd->sc_cred);
  467                                 /* Data in buffer cache needs to be in clear */
  468                                 if (vnd->sc_keyctx)
  469                                         vndencrypt(vnd, bp->b_data,
  470                                            bp->b_bcount, bp->b_blkno, 0);
  471                         }
  472                         VOP_UNLOCK(vnd->sc_vp, 0, p);
  473                         if (bp->b_error)
  474                                 bp->b_flags |= B_ERROR;
  475                         bp->b_resid = auio.uio_resid;
  476                         s = splbio();
  477                         biodone(bp);
  478                         splx(s);
  479 
  480                         /* If nothing more is queued, we are done.  */
  481                         if (!vnd->sc_tab.b_active)
  482                                 return;
  483 
  484                         /*
  485                          * Dequeue now since lower level strategy
  486                          * routine might queue using same links.
  487                          */
  488                         s = splbio();
  489                         bp = vnd->sc_tab.b_actf;
  490                         vnd->sc_tab.b_actf = bp->b_actf;
  491                         vnd->sc_tab.b_active--;
  492                         splx(s);
  493                 }
  494         }
  495 
  496         /* The old-style buffercache bypassing method.  */
  497         bn += DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]);
  498         bn = dbtob(bn);
  499         bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
  500         addr = bp->b_data;
  501         flags = bp->b_flags | B_CALL;
  502         for (resid = bp->b_resid; resid; resid -= sz) {
  503                 struct vnode *vp;
  504                 daddr64_t nbn;
  505                 int off, s, nra;
  506 
  507                 nra = 0;
  508                 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
  509                 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
  510                 VOP_UNLOCK(vnd->sc_vp, 0, p);
  511                 if (error == 0 && (long)nbn == -1)
  512                         error = EIO;
  513 #ifdef VNDDEBUG
  514                 if (!dovndcluster)
  515                         nra = 0;
  516 #endif
  517 
  518                 if ((off = bn % bsize) != 0)
  519                         sz = bsize - off;
  520                 else
  521                         sz = (1 + nra) * bsize;
  522                 if (resid < sz)
  523                         sz = resid;
  524 
  525                 DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %x/%x sz %x\n",
  526                     vnd->sc_vp, vp, bn, nbn, sz);
  527 
  528                 s = splbio();
  529                 nbp = getvndbuf();
  530                 splx(s);
  531                 nbp->vb_buf.b_flags = flags;
  532                 nbp->vb_buf.b_bcount = sz;
  533                 nbp->vb_buf.b_bufsize = bp->b_bufsize;
  534                 nbp->vb_buf.b_error = 0;
  535                 if (vp->v_type == VBLK || vp->v_type == VCHR)
  536                         nbp->vb_buf.b_dev = vp->v_rdev;
  537                 else
  538                         nbp->vb_buf.b_dev = NODEV;
  539                 nbp->vb_buf.b_data = addr;
  540                 nbp->vb_buf.b_blkno = nbn + btodb(off);
  541                 nbp->vb_buf.b_proc = bp->b_proc;
  542                 nbp->vb_buf.b_iodone = vndiodone;
  543                 nbp->vb_buf.b_vp = vp;
  544                 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
  545                 nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
  546                 nbp->vb_buf.b_validoff = bp->b_validoff;
  547                 nbp->vb_buf.b_validend = bp->b_validend;
  548                 LIST_INIT(&nbp->vb_buf.b_dep);
  549 
  550                 /* save a reference to the old buffer */
  551                 nbp->vb_obp = bp;
  552 
  553                 /*
  554                  * If there was an error or a hole in the file...punt.
  555                  * Note that we deal with this after the nbp allocation.
  556                  * This ensures that we properly clean up any operations
  557                  * that we have already fired off.
  558                  *
  559                  * XXX we could deal with holes here but it would be
  560                  * a hassle (in the write case).
  561                  * We must still however charge for the write even if there
  562                  * was an error.
  563                  */
  564                 if (error) {
  565                         nbp->vb_buf.b_error = error;
  566                         nbp->vb_buf.b_flags |= B_ERROR;
  567                         bp->b_resid -= (resid - sz);
  568                         s = splbio();
  569                         /* charge for the write */
  570                         if ((nbp->vb_buf.b_flags & B_READ) == 0)
  571                                 nbp->vb_buf.b_vp->v_numoutput++;
  572                         biodone(&nbp->vb_buf);
  573                         splx(s);
  574                         return;
  575                 }
  576                 /*
  577                  * Just sort by block number
  578                  */
  579                 nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
  580                 s = splbio();
  581                 disksort(&vnd->sc_tab, &nbp->vb_buf);
  582                 vnd->sc_tab.b_active++;
  583                 vndstart(vnd);
  584                 splx(s);
  585                 bn += sz;
  586                 addr += sz;
  587         }
  588 }
  589 
  590 /*
  591  * Feed requests sequentially.
  592  * We do it this way to keep from flooding NFS servers if we are connected
  593  * to an NFS file.  This places the burden on the client rather than the
  594  * server.
  595  */
  596 void
  597 vndstart(struct vnd_softc *vnd)
  598 {
  599         struct buf *bp;
  600 
  601         /*
  602          * Dequeue now since lower level strategy routine might
  603          * queue using same links
  604          */
  605         bp = vnd->sc_tab.b_actf;
  606         vnd->sc_tab.b_actf = bp->b_actf;
  607 
  608         DNPRINTF(VDB_IO,
  609             "vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n",
  610             vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
  611             bp->b_bcount);
  612 
  613         /* Instrumentation. */
  614         disk_busy(&vnd->sc_dk);
  615 
  616         if ((bp->b_flags & B_READ) == 0)
  617                 bp->b_vp->v_numoutput++;
  618         VOP_STRATEGY(bp);
  619 }
  620 
  621 void
  622 vndiodone(struct buf *bp)
  623 {
  624         struct vndbuf *vbp = (struct vndbuf *) bp;
  625         struct buf *pbp = vbp->vb_obp;
  626         struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
  627 
  628         splassert(IPL_BIO);
  629 
  630         DNPRINTF(VDB_IO,
  631             "vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n",
  632             vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
  633             vbp->vb_buf.b_data, vbp->vb_buf.b_bcount);
  634 
  635         if (vbp->vb_buf.b_error) {
  636                 DNPRINTF(VDB_IO, "vndiodone: vbp %p error %d\n", vbp,
  637                     vbp->vb_buf.b_error);
  638 
  639                 pbp->b_flags |= B_ERROR;
  640                 pbp->b_error = biowait(&vbp->vb_buf);
  641         }
  642         pbp->b_resid -= vbp->vb_buf.b_bcount;
  643         putvndbuf(vbp);
  644         if (vnd->sc_tab.b_active) {
  645                 disk_unbusy(&vnd->sc_dk, (pbp->b_bcount - pbp->b_resid),
  646                     (pbp->b_flags & B_READ));
  647                 if (!vnd->sc_tab.b_actf)
  648                         vnd->sc_tab.b_active--;
  649         }
  650         if (pbp->b_resid == 0) {
  651                 DNPRINTF(VDB_IO, "vndiodone: pbp %p iodone\n", pbp);
  652                 biodone(pbp);
  653         }
  654 
  655 }
  656 
  657 /* ARGSUSED */
  658 int
  659 vndread(dev_t dev, struct uio *uio, int flags)
  660 {
  661         int unit = vndunit(dev);
  662         struct vnd_softc *sc;
  663 
  664         DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio);
  665 
  666         if (unit >= numvnd)
  667                 return (ENXIO);
  668         sc = &vnd_softc[unit];
  669 
  670         if ((sc->sc_flags & VNF_INITED) == 0)
  671                 return (ENXIO);
  672 
  673         return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
  674 }
  675 
  676 /* ARGSUSED */
  677 int
  678 vndwrite(dev_t dev, struct uio *uio, int flags)
  679 {
  680         int unit = vndunit(dev);
  681         struct vnd_softc *sc;
  682 
  683         DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio);
  684 
  685         if (unit >= numvnd)
  686                 return (ENXIO);
  687         sc = &vnd_softc[unit];
  688 
  689         if ((sc->sc_flags & VNF_INITED) == 0)
  690                 return (ENXIO);
  691 
  692         return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
  693 }
  694 
  695 /* ARGSUSED */
  696 int
  697 vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
  698 {
  699         int unit = vndunit(dev);
  700         struct vnd_softc *vnd;
  701         struct vnd_ioctl *vio;
  702         struct vnd_user *vnu;
  703         struct vattr vattr;
  704         struct nameidata nd;
  705         int error, part, pmask, s;
  706 
  707         DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n",
  708             dev, cmd, addr, flag, p, unit);
  709 
  710         error = suser(p, 0);
  711         if (error)
  712                 return (error);
  713         if (unit >= numvnd)
  714                 return (ENXIO);
  715 
  716         vnd = &vnd_softc[unit];
  717         vio = (struct vnd_ioctl *)addr;
  718         switch (cmd) {
  719 
  720         case VNDIOCSET:
  721                 if (vnd->sc_flags & VNF_INITED)
  722                         return (EBUSY);
  723                 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen)
  724                         return (EINVAL);
  725 
  726                 if ((error = vndlock(vnd)) != 0)
  727                         return (error);
  728 
  729                 if ((error = copyinstr(vio->vnd_file, vnd->sc_file,
  730                     sizeof(vnd->sc_file), NULL))) {
  731                         vndunlock(vnd);
  732                         return (error);
  733                 }
  734 
  735                 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname));
  736                 if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname),
  737                     "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) {
  738                         printf("VNDIOCSET: device name too long\n");
  739                         vndunlock(vnd);
  740                         return(ENXIO);
  741                 }
  742 
  743                 /*
  744                  * Open for read and write first. This lets vn_open() weed out
  745                  * directories, sockets, etc. so we don't have to worry about
  746                  * them.
  747                  */
  748                 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
  749                 vnd->sc_flags &= ~VNF_READONLY; 
  750                 error = vn_open(&nd, FREAD|FWRITE, 0);
  751                 if (error == EROFS) {
  752                         vnd->sc_flags |= VNF_READONLY;
  753                         error = vn_open(&nd, FREAD, 0);
  754                 }
  755                 if (error) {
  756                         vndunlock(vnd);
  757                         return (error);
  758                 }
  759 
  760                 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
  761                 if (error) {
  762                         VOP_UNLOCK(nd.ni_vp, 0, p);
  763                         (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
  764                         vndunlock(vnd);
  765                         return (error);
  766                 }
  767                 VOP_UNLOCK(nd.ni_vp, 0, p);
  768                 vnd->sc_vp = nd.ni_vp;
  769                 vnd->sc_size = btodb(vattr.va_size);    /* note truncation */
  770                 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
  771                         (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
  772                         vndunlock(vnd);
  773                         return (error);
  774                 }
  775 
  776                 if (vio->vnd_keylen > 0) {
  777                         char key[BLF_MAXUTILIZED];
  778 
  779                         if (vio->vnd_keylen > sizeof(key))
  780                                 vio->vnd_keylen = sizeof(key);
  781 
  782                         if ((error = copyin(vio->vnd_key, key,
  783                             vio->vnd_keylen)) != 0) {
  784                                 (void) vn_close(nd.ni_vp, VNDRW(vnd),
  785                                     p->p_ucred, p);
  786                                 vndunlock(vnd);
  787                                 return (error);
  788                         }
  789 
  790                         vnd->sc_keyctx = malloc(sizeof(*vnd->sc_keyctx), M_DEVBUF,
  791                             M_WAITOK);
  792                         blf_key(vnd->sc_keyctx, key, vio->vnd_keylen);
  793                         bzero(key, vio->vnd_keylen);
  794                 } else
  795                         vnd->sc_keyctx = NULL;
  796 
  797                 vio->vnd_size = dbtob((off_t)vnd->sc_size);
  798                 vnd->sc_flags |= VNF_INITED;
  799 
  800                 DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n",
  801                     vnd->sc_vp, (unsigned long long)vnd->sc_size);
  802 
  803                 /* Attach the disk. */
  804                 vnd->sc_dk.dk_driver = &vnddkdriver;
  805                 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname;
  806                 disk_attach(&vnd->sc_dk);
  807 
  808                 vndunlock(vnd);
  809 
  810                 break;
  811 
  812         case VNDIOCCLR:
  813                 if ((vnd->sc_flags & VNF_INITED) == 0)
  814                         return (ENXIO);
  815 
  816                 if ((error = vndlock(vnd)) != 0)
  817                         return (error);
  818 
  819                 /*
  820                  * Don't unconfigure if any other partitions are open
  821                  * or if both the character and block flavors of this
  822                  * partition are open.
  823                  */
  824                 part = DISKPART(dev);
  825                 pmask = (1 << part);
  826                 if ((vnd->sc_dk.dk_openmask & ~pmask) ||
  827                     ((vnd->sc_dk.dk_bopenmask & pmask) &&
  828                     (vnd->sc_dk.dk_copenmask & pmask))) {
  829                         vndunlock(vnd);
  830                         return (EBUSY);
  831                 }
  832 
  833                 vndclear(vnd);
  834                 DNPRINTF(VDB_INIT, "vndioctl: CLRed\n");
  835 
  836                 /* Free crypto key */
  837                 if (vnd->sc_keyctx) {
  838                         bzero(vnd->sc_keyctx, sizeof(*vnd->sc_keyctx));
  839                         free(vnd->sc_keyctx, M_DEVBUF);
  840                 }
  841 
  842                 /* Detatch the disk. */
  843                 disk_detach(&vnd->sc_dk);
  844 
  845                 /* This must be atomic. */
  846                 s = splhigh();
  847                 vndunlock(vnd);
  848                 bzero(vnd, sizeof(struct vnd_softc));
  849                 splx(s);
  850                 break;
  851 
  852         case VNDIOCGET:
  853                 vnu = (struct vnd_user *)addr;
  854 
  855                 if (vnu->vnu_unit == -1)
  856                         vnu->vnu_unit = unit;
  857                 if (vnu->vnu_unit >= numvnd)
  858                         return (ENXIO);
  859                 if (vnu->vnu_unit < 0)
  860                         return (EINVAL);
  861 
  862                 vnd = &vnd_softc[vnu->vnu_unit];
  863 
  864                 if (vnd->sc_flags & VNF_INITED) {
  865                         error = VOP_GETATTR(vnd->sc_vp, &vattr, p->p_ucred, p);
  866                         if (error)
  867                                 return (error);
  868 
  869                         strlcpy(vnu->vnu_file, vnd->sc_file,
  870                             sizeof(vnu->vnu_file));
  871                         vnu->vnu_dev = vattr.va_fsid;
  872                         vnu->vnu_ino = vattr.va_fileid;
  873                 } else {
  874                         vnu->vnu_dev = 0;
  875                         vnu->vnu_ino = 0;
  876                 }
  877 
  878                 break;
  879 
  880         case DIOCGDINFO:
  881                 if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
  882                         return (ENOTTY);
  883                 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label);
  884                 return (0);
  885 
  886         case DIOCGPART:
  887                 if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
  888                         return (ENOTTY);
  889                 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label;
  890                 ((struct partinfo *)addr)->part =
  891                     &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)];
  892                 return (0);
  893 
  894         case DIOCWDINFO:
  895         case DIOCSDINFO:
  896                 if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
  897                         return (ENOTTY);
  898                 if ((flag & FWRITE) == 0)
  899                         return (EBADF);
  900 
  901                 if ((error = vndlock(vnd)) != 0)
  902                         return (error);
  903                 vnd->sc_flags |= VNF_LABELLING;
  904 
  905                 error = setdisklabel(vnd->sc_dk.dk_label,
  906                     (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0);
  907                 if (error == 0) {
  908                         if (cmd == DIOCWDINFO)
  909                                 error = writedisklabel(VNDLABELDEV(dev),
  910                                     vndstrategy, vnd->sc_dk.dk_label);
  911                 }
  912 
  913                 vnd->sc_flags &= ~VNF_LABELLING;
  914                 vndunlock(vnd);
  915                 return (error);
  916 
  917         case DIOCWLABEL:
  918                 if ((flag & FWRITE) == 0)
  919                         return (EBADF);
  920                 if (*(int *)addr)
  921                         vnd->sc_flags |= VNF_WLABEL;
  922                 else
  923                         vnd->sc_flags &= ~VNF_WLABEL;
  924                 return (0);
  925 
  926         default:
  927                 return (ENOTTY);
  928         }
  929 
  930         return (0);
  931 }
  932 
  933 /*
  934  * Duplicate the current processes' credentials.  Since we are called only
  935  * as the result of a SET ioctl and only root can do that, any future access
  936  * to this "disk" is essentially as root.  Note that credentials may change
  937  * if some other uid can write directly to the mapped file (NFS).
  938  */
  939 int
  940 vndsetcred(struct vnd_softc *vnd, struct ucred *cred)
  941 {
  942         struct uio auio;
  943         struct iovec aiov;
  944         char *tmpbuf;
  945         int error;
  946         struct proc *p = curproc;
  947 
  948         vnd->sc_cred = crdup(cred);
  949         tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
  950 
  951         /* XXX: Horrible kludge to establish credentials for NFS */
  952         aiov.iov_base = tmpbuf;
  953         aiov.iov_len = MIN(DEV_BSIZE, dbtob((off_t)vnd->sc_size));
  954         auio.uio_iov = &aiov;
  955         auio.uio_iovcnt = 1;
  956         auio.uio_offset = 0;
  957         auio.uio_rw = UIO_READ;
  958         auio.uio_segflg = UIO_SYSSPACE;
  959         auio.uio_resid = aiov.iov_len;
  960         vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
  961         error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
  962         VOP_UNLOCK(vnd->sc_vp, 0, p);
  963 
  964         free(tmpbuf, M_TEMP);
  965         return (error);
  966 }
  967 
  968 void
  969 vndshutdown(void)
  970 {
  971         struct vnd_softc *vnd;
  972 
  973         for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
  974                 if (vnd->sc_flags & VNF_INITED)
  975                         vndclear(vnd);
  976 }
  977 
  978 void
  979 vndclear(struct vnd_softc *vnd)
  980 {
  981         struct vnode *vp = vnd->sc_vp;
  982         struct proc *p = curproc;               /* XXX */
  983 
  984         DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", vnd, vp);
  985 
  986         vnd->sc_flags &= ~VNF_INITED;
  987         if (vp == NULL)
  988                 panic("vndioctl: null vp");
  989         (void) vn_close(vp, VNDRW(vnd), vnd->sc_cred, p);
  990         crfree(vnd->sc_cred);
  991         vnd->sc_vp = NULL;
  992         vnd->sc_cred = NULL;
  993         vnd->sc_size = 0;
  994 }
  995 
  996 daddr64_t
  997 vndsize(dev_t dev)
  998 {
  999         int unit = vndunit(dev);
 1000         struct vnd_softc *vnd = &vnd_softc[unit];
 1001 
 1002         if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
 1003                 return (-1);
 1004         return (vnd->sc_size);
 1005 }
 1006 
 1007 int
 1008 vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size)
 1009 {
 1010 
 1011         /* Not implemented. */
 1012         return (ENXIO);
 1013 }

/* [<][>][^][v][top][bottom][index][help] */