root/nfs/nfs_bio.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. nfs_bioread
  2. nfs_write
  3. nfs_getcacheblk
  4. nfs_vinvalbuf
  5. nfs_asyncio
  6. nfs_doio

    1 /*      $OpenBSD: nfs_bio.c,v 1.46 2007/06/01 23:47:57 deraadt Exp $    */
    2 /*      $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $     */
    3 
    4 /*
    5  * Copyright (c) 1989, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * This code is derived from software contributed to Berkeley by
    9  * Rick Macklem at The University of Guelph.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
   36  */
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/resourcevar.h>
   41 #include <sys/signalvar.h>
   42 #include <sys/proc.h>
   43 #include <sys/buf.h>
   44 #include <sys/vnode.h>
   45 #include <sys/mount.h>
   46 #include <sys/kernel.h>
   47 #include <sys/namei.h>
   48 
   49 #include <uvm/uvm_extern.h>
   50 
   51 #include <nfs/rpcv2.h>
   52 #include <nfs/nfsproto.h>
   53 #include <nfs/nfs.h>
   54 #include <nfs/nfsmount.h>
   55 #include <nfs/nfsnode.h>
   56 #include <nfs/nfs_var.h>
   57 
   58 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
   59 extern int nfs_numasync;
   60 extern struct nfsstats nfsstats;
   61 struct nfs_bufqhead nfs_bufq;
   62 
   63 /*
   64  * Vnode op for read using bio
   65  * Any similarity to readip() is purely coincidental
   66  */
   67 int
   68 nfs_bioread(vp, uio, ioflag, cred)
   69         struct vnode *vp;
   70         struct uio *uio;
   71         int ioflag;
   72         struct ucred *cred;
   73 {
   74         struct nfsnode *np = VTONFS(vp);
   75         int biosize, diff;
   76         struct buf *bp = NULL, *rabp;
   77         struct vattr vattr;
   78         struct proc *p;
   79         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
   80         daddr64_t lbn, bn, rabn;
   81         caddr_t baddr;
   82         int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
   83         off_t offdiff;
   84 
   85 #ifdef DIAGNOSTIC
   86         if (uio->uio_rw != UIO_READ)
   87                 panic("nfs_read mode");
   88 #endif
   89         if (uio->uio_resid == 0)
   90                 return (0);
   91         if (uio->uio_offset < 0)
   92                 return (EINVAL);
   93         p = uio->uio_procp;
   94         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
   95                 (void)nfs_fsinfo(nmp, vp, cred, p);
   96         biosize = nmp->nm_rsize;
   97         /*
   98          * For nfs, cache consistency can only be maintained approximately.
   99          * Although RFC1094 does not specify the criteria, the following is
  100          * believed to be compatible with the reference port.
  101          * For nfs:
  102          * If the file's modify time on the server has changed since the
  103          * last read rpc or you have written to the file,
  104          * you may have lost data cache consistency with the
  105          * server, so flush all of the file's data out of the cache.
  106          * Then force a getattr rpc to ensure that you have up to date
  107          * attributes.
  108          * NB: This implies that cache data can be read when up to
  109          * NFS_ATTRTIMEO seconds out of date. If you find that you need current
  110          * attributes this could be forced by setting n_attrstamp to 0 before
  111          * the VOP_GETATTR() call.
  112          */
  113         if (np->n_flag & NMODIFIED) {
  114                 np->n_attrstamp = 0;
  115                 error = VOP_GETATTR(vp, &vattr, cred, p);
  116                 if (error)
  117                         return (error);
  118                 np->n_mtime = vattr.va_mtime.tv_sec;
  119         } else {
  120                 error = VOP_GETATTR(vp, &vattr, cred, p);
  121                 if (error)
  122                         return (error);
  123                 if (np->n_mtime != vattr.va_mtime.tv_sec) {
  124                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  125                         if (error)
  126                                 return (error);
  127                         np->n_mtime = vattr.va_mtime.tv_sec;
  128                 }
  129         }
  130 
  131         /*
  132          * update the cache read creds for this vnode
  133          */
  134         if (np->n_rcred)
  135                 crfree(np->n_rcred);
  136         np->n_rcred = cred;
  137         crhold(cred);
  138 
  139         do {
  140             if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
  141                     return (nfs_readlinkrpc(vp, uio, cred));
  142             }
  143             baddr = (caddr_t)0;
  144             switch (vp->v_type) {
  145             case VREG:
  146                 nfsstats.biocache_reads++;
  147                 lbn = uio->uio_offset / biosize;
  148                 on = uio->uio_offset & (biosize - 1);
  149                 bn = lbn * (biosize / DEV_BSIZE);
  150                 not_readin = 1;
  151 
  152                 /*
  153                  * Start the read ahead(s), as required.
  154                  */
  155                 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
  156                     for (nra = 0; nra < nmp->nm_readahead &&
  157                         (lbn + 1 + nra) * biosize < np->n_size; nra++) {
  158                         rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
  159                         if (!incore(vp, rabn)) {
  160                             rabp = nfs_getcacheblk(vp, rabn, biosize, p);
  161                             if (!rabp)
  162                                 return (EINTR);
  163                             if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
  164                                 rabp->b_flags |= (B_READ | B_ASYNC);
  165                                 if (nfs_asyncio(rabp)) {
  166                                     rabp->b_flags |= B_INVAL;
  167                                     brelse(rabp);
  168                                 }
  169                             } else
  170                                 brelse(rabp);
  171                         }
  172                     }
  173                 }
  174 
  175                 /*
  176                  * If the block is in the cache and has the required data
  177                  * in a valid region, just copy it out.
  178                  * Otherwise, get the block and write back/read in,
  179                  * as required.
  180                  */
  181                 if ((bp = incore(vp, bn)) &&
  182                     (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
  183                     (B_BUSY | B_WRITEINPROG))
  184                         got_buf = 0;
  185                 else {
  186 again:
  187                         bp = nfs_getcacheblk(vp, bn, biosize, p);
  188                         if (!bp)
  189                                 return (EINTR);
  190                         got_buf = 1;
  191                         if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
  192                                 bp->b_flags |= B_READ;
  193                                 not_readin = 0;
  194                                 error = nfs_doio(bp, p);
  195                                 if (error) {
  196                                     brelse(bp);
  197                                     return (error);
  198                                 }
  199                         }
  200                 }
  201                 n = min((unsigned)(biosize - on), uio->uio_resid);
  202                 offdiff = np->n_size - uio->uio_offset;
  203                 if (offdiff < (off_t)n)
  204                         n = (int)offdiff;
  205                 if (not_readin && n > 0) {
  206                         if (on < bp->b_validoff || (on + n) > bp->b_validend) {
  207                                 if (!got_buf) {
  208                                     bp = nfs_getcacheblk(vp, bn, biosize, p);
  209                                     if (!bp)
  210                                         return (EINTR);
  211                                     got_buf = 1;
  212                                 }
  213                                 bp->b_flags |= B_INVAFTERWRITE;
  214                                 if (bp->b_dirtyend > 0) {
  215                                     if ((bp->b_flags & B_DELWRI) == 0)
  216                                         panic("nfsbioread");
  217                                     if (VOP_BWRITE(bp) == EINTR)
  218                                         return (EINTR);
  219                                 } else
  220                                     brelse(bp);
  221                                 goto again;
  222                         }
  223                 }
  224                 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
  225                 if (diff < n)
  226                         n = diff;
  227                 break;
  228             case VLNK:
  229                 nfsstats.biocache_readlinks++;
  230                 bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p);
  231                 if (!bp)
  232                         return (EINTR);
  233                 if ((bp->b_flags & B_DONE) == 0) {
  234                         bp->b_flags |= B_READ;
  235                         error = nfs_doio(bp, p);
  236                         if (error) {
  237                                 brelse(bp);
  238                                 return (error);
  239                         }
  240                 }
  241                 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
  242                 got_buf = 1;
  243                 on = 0;
  244                 break;
  245             default:
  246                 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
  247                 break;
  248             }
  249 
  250             if (n > 0) {
  251                 if (!baddr)
  252                         baddr = bp->b_data;
  253                 error = uiomove(baddr + on, (int)n, uio);
  254             }
  255             switch (vp->v_type) {
  256             case VREG:
  257                 break;
  258             case VLNK:
  259                 n = 0;
  260                 break;
  261             default:
  262                 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
  263             }
  264             if (got_buf)
  265                 brelse(bp);
  266         } while (error == 0 && uio->uio_resid > 0 && n > 0);
  267         return (error);
  268 }
  269 
  270 /*
  271  * Vnode op for write using bio
  272  */
  273 int
  274 nfs_write(v)
  275         void *v;
  276 {
  277         struct vop_write_args *ap = v;
  278         int biosize;
  279         struct uio *uio = ap->a_uio;
  280         struct proc *p = uio->uio_procp;
  281         struct vnode *vp = ap->a_vp;
  282         struct nfsnode *np = VTONFS(vp);
  283         struct ucred *cred = ap->a_cred;
  284         int ioflag = ap->a_ioflag;
  285         struct buf *bp;
  286         struct vattr vattr;
  287         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  288         daddr64_t lbn, bn;
  289         int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0;
  290 
  291 #ifdef DIAGNOSTIC
  292         if (uio->uio_rw != UIO_WRITE)
  293                 panic("nfs_write mode");
  294         if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
  295                 panic("nfs_write proc");
  296 #endif
  297         if (vp->v_type != VREG)
  298                 return (EIO);
  299         if (np->n_flag & NWRITEERR) {
  300                 np->n_flag &= ~NWRITEERR;
  301                 return (np->n_error);
  302         }
  303         if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
  304                 (void)nfs_fsinfo(nmp, vp, cred, p);
  305         if (ioflag & (IO_APPEND | IO_SYNC)) {
  306                 if (np->n_flag & NMODIFIED) {
  307                         np->n_attrstamp = 0;
  308                         error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
  309                         if (error)
  310                                 return (error);
  311                 }
  312                 if (ioflag & IO_APPEND) {
  313                         np->n_attrstamp = 0;
  314                         error = VOP_GETATTR(vp, &vattr, cred, p);
  315                         if (error)
  316                                 return (error);
  317                         uio->uio_offset = np->n_size;
  318                 }
  319         }
  320         if (uio->uio_offset < 0)
  321                 return (EINVAL);
  322         if (uio->uio_resid == 0)
  323                 return (0);
  324         /*
  325          * Maybe this should be above the vnode op call, but so long as
  326          * file servers have no limits, i don't think it matters
  327          */
  328         if (p && uio->uio_offset + uio->uio_resid >
  329               p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  330                 psignal(p, SIGXFSZ);
  331                 return (EFBIG);
  332         }
  333 
  334         /*
  335          * update the cache write creds for this node.
  336          */
  337         if (np->n_wcred)
  338                 crfree(np->n_wcred);
  339         np->n_wcred = cred;
  340         crhold(cred);
  341 
  342         /*
  343          * I use nm_rsize, not nm_wsize so that all buffer cache blocks
  344          * will be the same size within a filesystem. nfs_writerpc will
  345          * still use nm_wsize when sizing the rpc's.
  346          */
  347         biosize = nmp->nm_rsize;
  348         do {
  349 
  350                 /*
  351                  * XXX make sure we aren't cached in the VM page cache
  352                  */
  353                 uvm_vnp_uncache(vp);
  354 
  355                 nfsstats.biocache_writes++;
  356                 lbn = uio->uio_offset / biosize;
  357                 on = uio->uio_offset & (biosize-1);
  358                 n = min((unsigned)(biosize - on), uio->uio_resid);
  359                 bn = lbn * (biosize / DEV_BSIZE);
  360 again:
  361                 bp = nfs_getcacheblk(vp, bn, biosize, p);
  362                 if (!bp)
  363                         return (EINTR);
  364                 np->n_flag |= NMODIFIED;
  365                 if (uio->uio_offset + n > np->n_size) {
  366                         np->n_size = uio->uio_offset + n;
  367                         uvm_vnp_setsize(vp, (u_long)np->n_size);
  368                         extended = 1;
  369                 } else if (uio->uio_offset + n < np->n_size)
  370                         truncated = 1;
  371 
  372                 /*
  373                  * If the new write will leave a contiguous dirty
  374                  * area, just update the b_dirtyoff and b_dirtyend,
  375                  * otherwise force a write rpc of the old dirty area.
  376                  */
  377                 if (bp->b_dirtyend > 0 &&
  378                     (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
  379                         bp->b_proc = p;
  380                         if (VOP_BWRITE(bp) == EINTR)
  381                                 return (EINTR);
  382                         goto again;
  383                 }
  384 
  385                 error = uiomove((char *)bp->b_data + on, n, uio);
  386                 if (error) {
  387                         bp->b_flags |= B_ERROR;
  388                         brelse(bp);
  389                         return (error);
  390                 }
  391                 if (bp->b_dirtyend > 0) {
  392                         bp->b_dirtyoff = min(on, bp->b_dirtyoff);
  393                         bp->b_dirtyend = max((on + n), bp->b_dirtyend);
  394                 } else {
  395                         bp->b_dirtyoff = on;
  396                         bp->b_dirtyend = on + n;
  397                 }
  398                 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
  399                     bp->b_validoff > bp->b_dirtyend) {
  400                         bp->b_validoff = bp->b_dirtyoff;
  401                         bp->b_validend = bp->b_dirtyend;
  402                 } else {
  403                         bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
  404                         bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
  405                 }
  406 
  407                 wrotedta = 1;
  408 
  409                 /*
  410                  * Since this block is being modified, it must be written
  411                  * again and not just committed.
  412                  */
  413 
  414                 if (NFS_ISV3(vp)) {
  415                         rw_enter_write(&np->n_commitlock);
  416                         if (bp->b_flags & B_NEEDCOMMIT) {
  417                                 bp->b_flags &= ~B_NEEDCOMMIT;
  418                                 nfs_del_tobecommitted_range(vp, bp);
  419                         }
  420                         nfs_del_committed_range(vp, bp);
  421                         rw_exit_write(&np->n_commitlock);
  422                 } else 
  423                         bp->b_flags &= ~B_NEEDCOMMIT;
  424 
  425                 /*
  426                  * If the lease is non-cachable or IO_SYNC do bwrite().
  427                  */
  428                 if (ioflag & IO_SYNC) {
  429                         bp->b_proc = p;
  430                         error = VOP_BWRITE(bp);
  431                         if (error)
  432                                 return (error);
  433                 } else if ((n + on) == biosize) {
  434                         bp->b_proc = (struct proc *)0;
  435                         bp->b_flags |= B_ASYNC;
  436                         (void)nfs_writebp(bp, 0);
  437                 } else {
  438                         bdwrite(bp);
  439                 }
  440         } while (uio->uio_resid > 0 && n > 0);
  441 
  442         if (wrotedta)
  443                 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) |
  444                     (truncated ? NOTE_TRUNCATE : 0));
  445 
  446         return (0);
  447 }
  448 
  449 /*
  450  * Get an nfs cache block.
  451  * Allocate a new one if the block isn't currently in the cache
  452  * and return the block marked busy. If the calling process is
  453  * interrupted by a signal for an interruptible mount point, return
  454  * NULL.
  455  */
  456 struct buf *
  457 nfs_getcacheblk(vp, bn, size, p)
  458         struct vnode *vp;
  459         daddr64_t bn;
  460         int size;
  461         struct proc *p;
  462 {
  463         struct buf *bp;
  464         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  465 
  466         if (nmp->nm_flag & NFSMNT_INT) {
  467                 bp = getblk(vp, bn, size, PCATCH, 0);
  468                 while (bp == (struct buf *)0) {
  469                         if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
  470                                 return ((struct buf *)0);
  471                         bp = getblk(vp, bn, size, 0, 2 * hz);
  472                 }
  473         } else
  474                 bp = getblk(vp, bn, size, 0, 0);
  475         return (bp);
  476 }
  477 
  478 /*
  479  * Flush and invalidate all dirty buffers. If another process is already
  480  * doing the flush, just wait for completion.
  481  */
  482 int
  483 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
  484         struct vnode *vp;
  485         int flags;
  486         struct ucred *cred;
  487         struct proc *p;
  488         int intrflg;
  489 {
  490         struct nfsnode *np = VTONFS(vp);
  491         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
  492         int error = 0, slpflag, slptimeo;
  493 
  494         if ((nmp->nm_flag & NFSMNT_INT) == 0)
  495                 intrflg = 0;
  496         if (intrflg) {
  497                 slpflag = PCATCH;
  498                 slptimeo = 2 * hz;
  499         } else {
  500                 slpflag = 0;
  501                 slptimeo = 0;
  502         }
  503         /*
  504          * First wait for any other process doing a flush to complete.
  505          */
  506         while (np->n_flag & NFLUSHINPROG) {
  507                 np->n_flag |= NFLUSHWANT;
  508                 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
  509                         slptimeo);
  510                 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
  511                         return (EINTR);
  512         }
  513 
  514         /*
  515          * Now, flush as required.
  516          */
  517         np->n_flag |= NFLUSHINPROG;
  518         error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
  519         while (error) {
  520                 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
  521                         np->n_flag &= ~NFLUSHINPROG;
  522                         if (np->n_flag & NFLUSHWANT) {
  523                                 np->n_flag &= ~NFLUSHWANT;
  524                                 wakeup((caddr_t)&np->n_flag);
  525                         }
  526                         return (EINTR);
  527                 }
  528                 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
  529         }
  530         np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
  531         if (np->n_flag & NFLUSHWANT) {
  532                 np->n_flag &= ~NFLUSHWANT;
  533                 wakeup((caddr_t)&np->n_flag);
  534         }
  535         return (0);
  536 }
  537 
  538 /*
  539  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  540  * This is mainly to avoid queueing async I/O requests when the nfsiods
  541  * are all hung on a dead server.
  542  */
  543 int
  544 nfs_asyncio(bp)
  545         struct buf *bp;
  546 {
  547         int i,s;
  548 
  549         if (nfs_numasync == 0)
  550                 return (EIO);
  551         for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
  552             if (nfs_iodwant[i]) {
  553                 if ((bp->b_flags & B_READ) == 0) {
  554                         bp->b_flags |= B_WRITEINPROG;
  555                 }
  556         
  557                 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
  558                 nfs_iodwant[i] = (struct proc *)0;
  559                 wakeup((caddr_t)&nfs_iodwant[i]);
  560                 return (0);
  561             }
  562 
  563         /*
  564          * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
  565          * return EIO so the process will call nfs_doio() and do it
  566          * synchronously.
  567          */
  568         if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
  569                 return (EIO);
  570 
  571         /*
  572          * Just turn the async write into a delayed write, instead of
  573          * doing in synchronously. Hopefully, at least one of the nfsiods
  574          * is currently doing a write for this file and will pick up the
  575          * delayed writes before going back to sleep.
  576          */
  577         s = splbio();
  578         buf_dirty(bp);
  579         biodone(bp);
  580         splx(s);
  581         return (0);
  582 }
  583 
  584 /*
  585  * Do an I/O operation to/from a cache block. This may be called
  586  * synchronously or from an nfsiod.
  587  */
  588 int
  589 nfs_doio(bp, p)
  590         struct buf *bp;
  591         struct proc *p;
  592 {
  593         struct uio *uiop;
  594         struct vnode *vp;
  595         struct nfsnode *np;
  596         struct nfsmount *nmp;
  597         int s, error = 0, diff, len, iomode, must_commit = 0;
  598         struct uio uio;
  599         struct iovec io;
  600 
  601         vp = bp->b_vp;
  602         np = VTONFS(vp);
  603         nmp = VFSTONFS(vp->v_mount);
  604         uiop = &uio;
  605         uiop->uio_iov = &io;
  606         uiop->uio_iovcnt = 1;
  607         uiop->uio_segflg = UIO_SYSSPACE;
  608         uiop->uio_procp = p;
  609 
  610         /*
  611          * Historically, paging was done with physio, but no more...
  612          */
  613         if (bp->b_flags & B_PHYS) {
  614             /*
  615              * ...though reading /dev/drum still gets us here.
  616              */
  617             io.iov_len = uiop->uio_resid = bp->b_bcount;
  618             /* mapping was done by vmapbuf() */
  619             io.iov_base = bp->b_data;
  620             uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
  621             if (bp->b_flags & B_READ) {
  622                 uiop->uio_rw = UIO_READ;
  623                 nfsstats.read_physios++;
  624                 error = nfs_readrpc(vp, uiop);
  625             } else {
  626                 iomode = NFSV3WRITE_DATASYNC;
  627                 uiop->uio_rw = UIO_WRITE;
  628                 nfsstats.write_physios++;
  629                 error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
  630             }
  631             if (error) {
  632                 bp->b_flags |= B_ERROR;
  633                 bp->b_error = error;
  634             }
  635         } else if (bp->b_flags & B_READ) {
  636             io.iov_len = uiop->uio_resid = bp->b_bcount;
  637             io.iov_base = bp->b_data;
  638             uiop->uio_rw = UIO_READ;
  639             switch (vp->v_type) {
  640             case VREG:
  641                 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
  642                 nfsstats.read_bios++;
  643                 error = nfs_readrpc(vp, uiop);
  644                 if (!error) {
  645                     bp->b_validoff = 0;
  646                     if (uiop->uio_resid) {
  647                         /*
  648                          * If len > 0, there is a hole in the file and
  649                          * no writes after the hole have been pushed to
  650                          * the server yet.
  651                          * Just zero fill the rest of the valid area.
  652                          */
  653                         diff = bp->b_bcount - uiop->uio_resid;
  654                         len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
  655                                 + diff);
  656                         if (len > 0) {
  657                             len = min(len, uiop->uio_resid);
  658                             bzero((char *)bp->b_data + diff, len);
  659                             bp->b_validend = diff + len;
  660                         } else
  661                             bp->b_validend = diff;
  662                     } else
  663                         bp->b_validend = bp->b_bcount;
  664                 }
  665                 if (p && (vp->v_flag & VTEXT) &&
  666                     (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
  667                         uprintf("Process killed due to text file modification\n");
  668                         psignal(p, SIGKILL);
  669                 }
  670                 break;
  671             case VLNK:
  672                 uiop->uio_offset = (off_t)0;
  673                 nfsstats.readlink_bios++;
  674                 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
  675                 break;
  676             default:
  677                 printf("nfs_doio:  type %x unexpected\n", vp->v_type);
  678                 break;
  679             };
  680             if (error) {
  681                 bp->b_flags |= B_ERROR;
  682                 bp->b_error = error;
  683             }
  684         } else {
  685             io.iov_len = uiop->uio_resid = bp->b_dirtyend
  686                 - bp->b_dirtyoff;
  687             uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
  688                 + bp->b_dirtyoff;
  689             io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
  690             uiop->uio_rw = UIO_WRITE;
  691             nfsstats.write_bios++;
  692             if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
  693                 iomode = NFSV3WRITE_UNSTABLE;
  694             else
  695                 iomode = NFSV3WRITE_FILESYNC;
  696             bp->b_flags |= B_WRITEINPROG;
  697             error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
  698 
  699             rw_enter_write(&np->n_commitlock);
  700             if (!error && iomode == NFSV3WRITE_UNSTABLE) {
  701                 bp->b_flags |= B_NEEDCOMMIT;
  702                 nfs_add_tobecommitted_range(vp, bp);
  703             } else {
  704                 bp->b_flags &= ~B_NEEDCOMMIT;
  705                 nfs_del_committed_range(vp, bp);
  706             }
  707             rw_exit_write(&np->n_commitlock);
  708 
  709             bp->b_flags &= ~B_WRITEINPROG;
  710 
  711             /*
  712              * For an interrupted write, the buffer is still valid and the
  713              * write hasn't been pushed to the server yet, so we can't set
  714              * B_ERROR and report the interruption by setting B_EINTR. For
  715              * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
  716              * is essentially a noop.
  717              * For the case of a V3 write rpc not being committed to stable
  718              * storage, the block is still dirty and requires either a commit
  719              * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
  720              * before the block is reused. This is indicated by setting the
  721              * B_DELWRI and B_NEEDCOMMIT flags.
  722              */
  723             if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
  724                     s = splbio();
  725                     buf_dirty(bp);
  726                     splx(s);
  727 
  728                     if (!(bp->b_flags & B_ASYNC) && error)
  729                             bp->b_flags |= B_EINTR;
  730             } else {
  731                 if (error) {
  732                     bp->b_flags |= B_ERROR;
  733                     bp->b_error = np->n_error = error;
  734                     np->n_flag |= NWRITEERR;
  735                 }
  736                 bp->b_dirtyoff = bp->b_dirtyend = 0;
  737             }
  738         }
  739         bp->b_resid = uiop->uio_resid;
  740         if (must_commit)
  741                 nfs_clearcommit(vp->v_mount);
  742         s = splbio();
  743         biodone(bp);
  744         splx(s);
  745         return (error);
  746 }

/* [<][>][^][v][top][bottom][index][help] */