ufs/ffs/ffs_vnops.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
ffs_read
ffs_write
ffs_fsync
ffs_reclaim
ffsfifo_reclaim
    1 /*      $OpenBSD: ffs_vnops.c,v 1.45 2007/06/01 23:47:57 deraadt Exp $  */
    2 /*      $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $    */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1989, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94
   33  */
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/resourcevar.h>
   38 #include <sys/kernel.h>
   39 #include <sys/file.h>
   40 #include <sys/stat.h>
   41 #include <sys/buf.h>
   42 #include <sys/proc.h>
   43 #include <sys/conf.h>
   44 #include <sys/mount.h>
   45 #include <sys/vnode.h>
   46 #include <sys/malloc.h>
   47 #include <sys/signalvar.h>
   48 #include <sys/pool.h>
   49 #include <sys/event.h>
   50 
   51 #include <uvm/uvm_extern.h>
   52 
   53 #include <miscfs/specfs/specdev.h>
   54 #include <miscfs/fifofs/fifo.h>
   55 
   56 #include <ufs/ufs/quota.h>
   57 #include <ufs/ufs/inode.h>
   58 #include <ufs/ufs/dir.h>
   59 #include <ufs/ufs/ufs_extern.h>
   60 #include <ufs/ufs/ufsmount.h>
   61 
   62 #include <ufs/ffs/fs.h>
   63 #include <ufs/ffs/ffs_extern.h>
   64 
   65 /* Global vfs data structures for ufs. */
   66 int (**ffs_vnodeop_p)(void *);
   67 struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
   68         { &vop_default_desc, vn_default_error },
   69         { &vop_lookup_desc, ufs_lookup },               /* lookup */
   70         { &vop_create_desc, ufs_create },               /* create */
   71         { &vop_mknod_desc, ufs_mknod },                 /* mknod */
   72         { &vop_open_desc, ufs_open },                   /* open */
   73         { &vop_close_desc, ufs_close },                 /* close */
   74         { &vop_access_desc, ufs_access },               /* access */
   75         { &vop_getattr_desc, ufs_getattr },             /* getattr */
   76         { &vop_setattr_desc, ufs_setattr },             /* setattr */
   77         { &vop_read_desc, ffs_read },                   /* read */
   78         { &vop_write_desc, ffs_write },                 /* write */
   79         { &vop_ioctl_desc, ufs_ioctl },                 /* ioctl */
   80         { &vop_poll_desc, ufs_poll },                   /* poll */
   81         { &vop_kqfilter_desc, ufs_kqfilter },           /* kqfilter */
   82         { &vop_revoke_desc, ufs_revoke },               /* revoke */
   83         { &vop_fsync_desc, ffs_fsync },                 /* fsync */
   84         { &vop_remove_desc, ufs_remove },               /* remove */
   85         { &vop_link_desc, ufs_link },                   /* link */
   86         { &vop_rename_desc, ufs_rename },               /* rename */
   87         { &vop_mkdir_desc, ufs_mkdir },                 /* mkdir */
   88         { &vop_rmdir_desc, ufs_rmdir },                 /* rmdir */
   89         { &vop_symlink_desc, ufs_symlink },             /* symlink */
   90         { &vop_readdir_desc, ufs_readdir },             /* readdir */
   91         { &vop_readlink_desc, ufs_readlink },           /* readlink */
   92         { &vop_abortop_desc, vop_generic_abortop },     /* abortop */
   93         { &vop_inactive_desc, ufs_inactive },           /* inactive */
   94         { &vop_reclaim_desc, ffs_reclaim },             /* reclaim */
   95         { &vop_lock_desc, ufs_lock },                   /* lock */
   96         { &vop_unlock_desc, ufs_unlock },               /* unlock */
   97         { &vop_bmap_desc, ufs_bmap },                   /* bmap */
   98         { &vop_strategy_desc, ufs_strategy },           /* strategy */
   99         { &vop_print_desc, ufs_print },                 /* print */
  100         { &vop_islocked_desc, ufs_islocked },           /* islocked */
  101         { &vop_pathconf_desc, ufs_pathconf },           /* pathconf */
  102         { &vop_advlock_desc, ufs_advlock },             /* advlock */
  103         { &vop_reallocblks_desc, ffs_reallocblks },     /* reallocblks */
  104         { &vop_bwrite_desc, vop_generic_bwrite },
  105         { NULL, NULL }
  106 };
  107 
  108 struct vnodeopv_desc ffs_vnodeop_opv_desc =
  109         { &ffs_vnodeop_p, ffs_vnodeop_entries };
  110 
  111 int (**ffs_specop_p)(void *);
  112 struct vnodeopv_entry_desc ffs_specop_entries[] = {
  113         { &vop_default_desc, spec_vnoperate },
  114         { &vop_close_desc, ufsspec_close },             /* close */
  115         { &vop_access_desc, ufs_access },               /* access */
  116         { &vop_getattr_desc, ufs_getattr },             /* getattr */
  117         { &vop_setattr_desc, ufs_setattr },             /* setattr */
  118         { &vop_read_desc, ufsspec_read },               /* read */
  119         { &vop_write_desc, ufsspec_write },             /* write */
  120         { &vop_fsync_desc, ffs_fsync },                 /* fsync */
  121         { &vop_inactive_desc, ufs_inactive },           /* inactive */
  122         { &vop_reclaim_desc, ffs_reclaim },             /* reclaim */
  123         { &vop_lock_desc, ufs_lock },                   /* lock */
  124         { &vop_unlock_desc, ufs_unlock },               /* unlock */
  125         { &vop_print_desc, ufs_print },                 /* print */
  126         { &vop_islocked_desc, ufs_islocked },           /* islocked */
  127         { NULL, NULL }
  128 };
  129 
  130 struct vnodeopv_desc ffs_specop_opv_desc =
  131         { &ffs_specop_p, ffs_specop_entries };
  132 
  133 #ifdef FIFO
  134 int (**ffs_fifoop_p)(void *);
  135 struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
  136         { &vop_default_desc, fifo_vnoperate },
  137         { &vop_close_desc, ufsfifo_close },             /* close */
  138         { &vop_access_desc, ufs_access },               /* access */
  139         { &vop_getattr_desc, ufs_getattr },             /* getattr */
  140         { &vop_setattr_desc, ufs_setattr },             /* setattr */
  141         { &vop_read_desc, ufsfifo_read },               /* read */
  142         { &vop_write_desc, ufsfifo_write },             /* write */
  143         { &vop_fsync_desc, ffs_fsync },                 /* fsync */
  144         { &vop_inactive_desc, ufs_inactive },           /* inactive */
  145         { &vop_reclaim_desc, ffsfifo_reclaim },         /* reclaim */
  146         { &vop_lock_desc, ufs_lock },                   /* lock */
  147         { &vop_unlock_desc, ufs_unlock },               /* unlock */
  148         { &vop_print_desc, ufs_print },                 /* print */
  149         { &vop_islocked_desc, ufs_islocked },           /* islocked */
  150         { &vop_bwrite_desc, vop_generic_bwrite },
  151         { NULL, NULL }
  152 };
  153 
  154 struct vnodeopv_desc ffs_fifoop_opv_desc =
  155         { &ffs_fifoop_p, ffs_fifoop_entries };
  156 #endif /* FIFO */
  157 
  158 /*
  159  * Enabling cluster read/write operations.
  160  */
  161 int doclusterread = 1;
  162 int doclusterwrite = 1;
  163 
  164 /*
  165  * Vnode op for reading.
  166  */
  167 /* ARGSUSED */
  168 int
  169 ffs_read(void *v)
  170 {
  171         struct vop_read_args *ap = v;
  172         struct vnode *vp;
  173         struct inode *ip;
  174         struct uio *uio;
  175         struct fs *fs;
  176         struct buf *bp;
  177         daddr64_t lbn, nextlbn;
  178         off_t bytesinfile;
  179         long size, xfersize, blkoffset;
  180         mode_t mode;
  181         int error;
  182 
  183         vp = ap->a_vp;
  184         ip = VTOI(vp);
  185         mode = DIP(ip, mode);
  186         uio = ap->a_uio;
  187 
  188 #ifdef DIAGNOSTIC
  189         if (uio->uio_rw != UIO_READ)
  190                 panic("ffs_read: mode");
  191 
  192         if (vp->v_type == VLNK) {
  193                 if ((int)DIP(ip, size) < vp->v_mount->mnt_maxsymlinklen ||
  194                     (vp->v_mount->mnt_maxsymlinklen == 0 &&
  195                      DIP(ip, blocks) == 0))
  196                         panic("ffs_read: short symlink");
  197         } else if (vp->v_type != VREG && vp->v_type != VDIR)
  198                 panic("ffs_read: type %d", vp->v_type);
  199 #endif
  200         fs = ip->i_fs;
  201         if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
  202                 return (EFBIG);
  203 
  204         if (uio->uio_resid == 0)
  205                 return (0);
  206 
  207         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  208                 if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0)
  209                         break;
  210                 lbn = lblkno(fs, uio->uio_offset);
  211                 nextlbn = lbn + 1;
  212                 size = fs->fs_bsize;    /* WAS blksize(fs, ip, lbn); */
  213                 blkoffset = blkoff(fs, uio->uio_offset);
  214                 xfersize = fs->fs_bsize - blkoffset;
  215                 if (uio->uio_resid < xfersize)
  216                         xfersize = uio->uio_resid;
  217                 if (bytesinfile < xfersize)
  218                         xfersize = bytesinfile;
  219 
  220                 if (lblktosize(fs, nextlbn) >= DIP(ip, size))
  221                         error = bread(vp, lbn, size, NOCRED, &bp);
  222                 else if (lbn - 1 == ip->i_ci.ci_lastr) {
  223                         error = bread_cluster(vp, lbn, size, &bp);
  224                 } else
  225                         error = bread(vp, lbn, size, NOCRED, &bp);
  226 
  227                 if (error)
  228                         break;
  229                 ip->i_ci.ci_lastr = lbn;
  230 
  231                 /*
  232                  * We should only get non-zero b_resid when an I/O error
  233                  * has occurred, which should cause us to break above.
  234                  * However, if the short read did not cause an error,
  235                  * then we want to ensure that we do not uiomove bad
  236                  * or uninitialized data.
  237                  */
  238                 size -= bp->b_resid;
  239                 if (size < xfersize) {
  240                         if (size == 0)
  241                                 break;
  242                         xfersize = size;
  243                 }
  244                 error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize,
  245                                 uio);
  246                 if (error)
  247                         break;
  248                 brelse(bp);
  249         }
  250         if (bp != NULL)
  251                 brelse(bp);
  252         ip->i_flag |= IN_ACCESS;
  253         return (error);
  254 }
  255 
  256 /*
  257  * Vnode op for writing.
  258  */
  259 int
  260 ffs_write(void *v)
  261 {
  262         struct vop_write_args *ap = v;
  263         struct vnode *vp;
  264         struct uio *uio;
  265         struct inode *ip;
  266         struct fs *fs;
  267         struct buf *bp;
  268         struct proc *p;
  269         daddr_t lbn;
  270         off_t osize;
  271         int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
  272 
  273         extended = 0;
  274         ioflag = ap->a_ioflag;
  275         uio = ap->a_uio;
  276         vp = ap->a_vp;
  277         ip = VTOI(vp);
  278 
  279 #ifdef DIAGNOSTIC
  280         if (uio->uio_rw != UIO_WRITE)
  281                 panic("ffs_write: mode");
  282 #endif
  283 
  284         /*
  285          * If writing 0 bytes, succeed and do not change
  286          * update time or file offset (standards compliance)
  287          */
  288         if (uio->uio_resid == 0)
  289                 return (0);
  290 
  291         switch (vp->v_type) {
  292         case VREG:
  293                 if (ioflag & IO_APPEND)
  294                         uio->uio_offset = DIP(ip, size);
  295                 if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size))
  296                         return (EPERM);
  297                 /* FALLTHROUGH */
  298         case VLNK:
  299                 break;
  300         case VDIR:
  301                 if ((ioflag & IO_SYNC) == 0)
  302                         panic("ffs_write: nonsync dir write");
  303                 break;
  304         default:
  305                 panic("ffs_write: type");
  306         }
  307 
  308         fs = ip->i_fs;
  309         if (uio->uio_offset < 0 ||
  310             (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
  311                 return (EFBIG);
  312         /*
  313          * Maybe this should be above the vnode op call, but so long as
  314          * file servers have no limits, I don't think it matters.
  315          */
  316         p = uio->uio_procp;
  317         if (vp->v_type == VREG && p && !(ioflag & IO_NOLIMIT) &&
  318             uio->uio_offset + uio->uio_resid >
  319             p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  320                 psignal(p, SIGXFSZ);
  321                 return (EFBIG);
  322         }
  323 
  324         resid = uio->uio_resid;
  325         osize = DIP(ip, size);
  326         flags = ioflag & IO_SYNC ? B_SYNC : 0;
  327 
  328         for (error = 0; uio->uio_resid > 0;) {
  329                 lbn = lblkno(fs, uio->uio_offset);
  330                 blkoffset = blkoff(fs, uio->uio_offset);
  331                 xfersize = fs->fs_bsize - blkoffset;
  332                 if (uio->uio_resid < xfersize)
  333                         xfersize = uio->uio_resid;
  334                 if (fs->fs_bsize > xfersize)
  335                         flags |= B_CLRBUF;
  336                 else
  337                         flags &= ~B_CLRBUF;
  338 
  339                 if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize,
  340                          ap->a_cred, flags, &bp)) != 0)
  341                         break;
  342                 if (uio->uio_offset + xfersize > DIP(ip, size)) {
  343                         DIP_ASSIGN(ip, size, uio->uio_offset + xfersize);
  344                         uvm_vnp_setsize(vp, DIP(ip, size));
  345                         extended = 1;
  346                 }
  347                 (void)uvm_vnp_uncache(vp);
  348 
  349                 size = blksize(fs, ip, lbn) - bp->b_resid;
  350                 if (size < xfersize)
  351                         xfersize = size;
  352 
  353                 error =
  354                     uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
  355 
  356                 if (error != 0)
  357                         bzero((char *)bp->b_data + blkoffset, xfersize);
  358 
  359                 if (ioflag & IO_SYNC)
  360                         (void)bwrite(bp);
  361                 else if (xfersize + blkoffset == fs->fs_bsize) {
  362                         if (doclusterwrite)
  363                                 cluster_write(bp, &ip->i_ci, DIP(ip, size));
  364                         else
  365                                 bawrite(bp);
  366                 } else
  367                         bdwrite(bp);
  368 
  369                 if (error || xfersize == 0)
  370                         break;
  371                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  372         }
  373         /*
  374          * If we successfully wrote any data, and we are not the superuser
  375          * we clear the setuid and setgid bits as a precaution against
  376          * tampering.
  377          */
  378         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
  379                 DIP(ip, mode) &= ~(ISUID | ISGID);
  380         if (resid > uio->uio_resid)
  381                 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
  382         if (error) {
  383                 if (ioflag & IO_UNIT) {
  384                         (void)UFS_TRUNCATE(ip, osize,
  385                             ioflag & IO_SYNC, ap->a_cred);
  386                         uio->uio_offset -= resid - uio->uio_resid;
  387                         uio->uio_resid = resid;
  388                 }
  389         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
  390                 error = UFS_UPDATE(ip, MNT_WAIT);
  391         }
  392         return (error);
  393 }
  394 
  395 /*
  396  * Synch an open file.
  397  */
  398 /* ARGSUSED */
  399 int
  400 ffs_fsync(void *v)
  401 {
  402         struct vop_fsync_args *ap = v;
  403         struct vnode *vp = ap->a_vp;
  404         struct buf *bp, *nbp;
  405         int s, error, passes, skipmeta;
  406 
  407         if (vp->v_type == VBLK &&
  408             vp->v_specmountpoint != NULL &&
  409             (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
  410                 softdep_fsync_mountdev(vp, ap->a_waitfor);
  411 
  412         /*
  413          * Flush all dirty buffers associated with a vnode.
  414          */
  415         passes = NIADDR + 1;
  416         skipmeta = 0;
  417         if (ap->a_waitfor == MNT_WAIT)
  418                 skipmeta = 1;
  419         s = splbio();
  420 loop:
  421         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
  422              bp = LIST_NEXT(bp, b_vnbufs))
  423                 bp->b_flags &= ~B_SCANNED;
  424         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  425                 nbp = LIST_NEXT(bp, b_vnbufs);
  426                 /* 
  427                  * Reasons to skip this buffer: it has already been considered
  428                  * on this pass, this pass is the first time through on a
  429                  * synchronous flush request and the buffer being considered
  430                  * is metadata, the buffer has dependencies that will cause
  431                  * it to be redirtied and it has not already been deferred,
  432                  * or it is already being written.
  433                  */
  434                 if (bp->b_flags & (B_BUSY | B_SCANNED))
  435                         continue;
  436                 if ((bp->b_flags & B_DELWRI) == 0)
  437                         panic("ffs_fsync: not dirty");
  438                 if (skipmeta && bp->b_lblkno < 0)
  439                         continue;
  440                 if (ap->a_waitfor != MNT_WAIT &&
  441                     LIST_FIRST(&bp->b_dep) != NULL &&
  442                     (bp->b_flags & B_DEFERRED) == 0 &&
  443                     buf_countdeps(bp, 0, 1)) {
  444                         bp->b_flags |= B_DEFERRED;
  445                         continue;
  446                 }
  447 
  448                 bremfree(bp);
  449                 bp->b_flags |= B_BUSY | B_SCANNED;
  450                 splx(s);
  451                 /*
  452                  * On our final pass through, do all I/O synchronously
  453                  * so that we can find out if our flush is failing
  454                  * because of write errors.
  455                  */
  456                 if (passes > 0 || ap->a_waitfor != MNT_WAIT)
  457                         (void) bawrite(bp);
  458                 else if ((error = bwrite(bp)) != 0)
  459                         return (error);
  460                 s = splbio();
  461                 /*
  462                  * Since we may have slept during the I/O, we need
  463                  * to start from a known point.
  464                  */
  465                 nbp = LIST_FIRST(&vp->v_dirtyblkhd);
  466         }
  467         if (skipmeta) {
  468                 skipmeta = 0;
  469                 goto loop;
  470         }
  471         if (ap->a_waitfor == MNT_WAIT) {
  472                 vwaitforio(vp, 0, "ffs_fsync", 0);
  473 
  474                 /*
  475                  * Ensure that any filesystem metadata associated
  476                  * with the vnode has been written.
  477                  */
  478                 splx(s);
  479                 if ((error = softdep_sync_metadata(ap)) != 0)
  480                         return (error);
  481                 s = splbio();
  482                 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
  483                         /*
  484                          * Block devices associated with filesystems may
  485                          * have new I/O requests posted for them even if
  486                          * the vnode is locked, so no amount of trying will
  487                          * get them clean. Thus we give block devices a
  488                          * good effort, then just give up. For all other file
  489                          * types, go around and try again until it is clean.
  490                          */
  491                         if (passes > 0) {
  492                                 passes -= 1;
  493                                 goto loop;
  494                         }
  495 #ifdef DIAGNOSTIC
  496                         if (vp->v_type != VBLK)
  497                                 vprint("ffs_fsync: dirty", vp);
  498 #endif
  499                 }
  500         }
  501         splx(s);
  502         return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
  503 }
  504 
  505 /*
  506  * Reclaim an inode so that it can be used for other purposes.
  507  */
  508 int
  509 ffs_reclaim(void *v)
  510 {
  511         struct vop_reclaim_args *ap = v;
  512         struct vnode *vp = ap->a_vp;
  513         struct inode *ip = VTOI(vp);
  514         int error;
  515 
  516         if ((error = ufs_reclaim(vp, ap->a_p)) != 0)
  517                 return (error);
  518 
  519         if (ip->i_din1 != NULL) {
  520 #ifdef FFS2
  521                 if (ip->i_ump->um_fstype == UM_UFS2)
  522                         pool_put(&ffs_dinode2_pool, ip->i_din2);
  523                 else
  524 #endif
  525                         pool_put(&ffs_dinode1_pool, ip->i_din1);
  526         }
  527 
  528         pool_put(&ffs_ino_pool, ip);
  529 
  530         vp->v_data = NULL;
  531 
  532         return (0);
  533 }
  534 
  535 #ifdef FIFO
  536 int
  537 ffsfifo_reclaim(void *v)
  538 {
  539         fifo_reclaim(v);
  540         return (ffs_reclaim(v));
  541 }
  542 #endif
/* [<][>][^][v][top][bottom][index][help] */
root/ufs/ffs/ffs_vnops.c

DEFINITIONS