root/kern/sys_generic.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sys_read
  2. dofileread
  3. sys_readv
  4. dofilereadv
  5. sys_write
  6. dofilewrite
  7. sys_writev
  8. dofilewritev
  9. sys_ioctl
  10. sys_select
  11. selscan
  12. seltrue
  13. selrecord
  14. selwakeup
  15. pollscan
  16. sys_poll

    1 /*      $OpenBSD: sys_generic.c,v 1.57 2007/07/25 23:11:52 art Exp $    */
    2 /*      $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $     */
    3 
    4 /*
    5  * Copyright (c) 1996 Theo de Raadt
    6  * Copyright (c) 1982, 1986, 1989, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  * (c) UNIX System Laboratories, Inc.
    9  * All or some portions of this file are derived from material licensed
   10  * to the University of California by American Telephone and Telegraph
   11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   12  * the permission of UNIX System Laboratories, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)sys_generic.c       8.5 (Berkeley) 1/21/94
   39  */
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/ioctl.h>
   45 #include <sys/file.h>
   46 #include <sys/proc.h>
   47 #include <sys/resourcevar.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/signalvar.h>
   50 #include <sys/uio.h>
   51 #include <sys/kernel.h>
   52 #include <sys/stat.h>
   53 #include <sys/malloc.h>
   54 #include <sys/poll.h>
   55 #ifdef KTRACE
   56 #include <sys/ktrace.h>
   57 #endif
   58 #include <sys/sched.h>
   59 
   60 #include <sys/mount.h>
   61 #include <sys/syscallargs.h>
   62 
   63 #include <uvm/uvm_extern.h>
   64 
   65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *);
   66 int seltrue(dev_t, int, struct proc *);
   67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *);
   68 
   69 /*
   70  * Read system call.
   71  */
   72 /* ARGSUSED */
   73 int
   74 sys_read(struct proc *p, void *v, register_t *retval)
   75 {
   76         struct sys_read_args /* {
   77                 syscallarg(int) fd;
   78                 syscallarg(void *) buf;
   79                 syscallarg(size_t) nbyte;
   80         } */ *uap = v;
   81         int fd = SCARG(uap, fd);
   82         struct file *fp;
   83         struct filedesc *fdp = p->p_fd;
   84 
   85         if ((fp = fd_getfile(fdp, fd)) == NULL)
   86                 return (EBADF);
   87         if ((fp->f_flag & FREAD) == 0)
   88                 return (EBADF);
   89 
   90         FREF(fp);
   91 
   92         /* dofileread() will FRELE the descriptor for us */
   93         return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
   94             &fp->f_offset, retval));
   95 }
   96 
   97 int
   98 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
   99     off_t *offset, register_t *retval)
  100 {
  101         struct uio auio;
  102         struct iovec aiov;
  103         long cnt, error = 0;
  104 #ifdef KTRACE
  105         struct iovec ktriov;
  106 #endif
  107 
  108         aiov.iov_base = buf;
  109         aiov.iov_len = nbyte;
  110         auio.uio_iov = &aiov;
  111         auio.uio_iovcnt = 1;
  112         auio.uio_resid = nbyte;
  113         auio.uio_rw = UIO_READ;
  114         auio.uio_segflg = UIO_USERSPACE;
  115         auio.uio_procp = p;
  116 
  117         /*
  118          * Reads return ssize_t because -1 is returned on error.  Therefore
  119          * we must restrict the length to SSIZE_MAX to avoid garbage return
  120          * values.
  121          */
  122         if (auio.uio_resid > SSIZE_MAX) {
  123                 error = EINVAL;
  124                 goto out;
  125         }
  126 
  127 #ifdef KTRACE
  128         /*
  129          * if tracing, save a copy of iovec
  130          */
  131         if (KTRPOINT(p, KTR_GENIO))
  132                 ktriov = aiov;
  133 #endif
  134         cnt = auio.uio_resid;
  135         error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
  136         if (error)
  137                 if (auio.uio_resid != cnt && (error == ERESTART ||
  138                     error == EINTR || error == EWOULDBLOCK))
  139                         error = 0;
  140         cnt -= auio.uio_resid;
  141 
  142         fp->f_rxfer++;
  143         fp->f_rbytes += cnt;
  144 #ifdef KTRACE
  145         if (KTRPOINT(p, KTR_GENIO) && error == 0)
  146                 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
  147 #endif
  148         *retval = cnt;
  149  out:
  150         FRELE(fp);
  151         return (error);
  152 }
  153 
  154 /*
  155  * Scatter read system call.
  156  */
  157 int
  158 sys_readv(struct proc *p, void *v, register_t *retval)
  159 {
  160         struct sys_readv_args /* {
  161                 syscallarg(int) fd;
  162                 syscallarg(const struct iovec *) iovp;
  163                 syscallarg(int) iovcnt;
  164         } */ *uap = v;
  165         int fd = SCARG(uap, fd);
  166         struct file *fp;
  167         struct filedesc *fdp = p->p_fd;
  168 
  169         if ((fp = fd_getfile(fdp, fd)) == NULL)
  170                 return (EBADF);
  171         if ((fp->f_flag & FREAD) == 0)
  172                 return (EBADF);
  173 
  174         FREF(fp);
  175 
  176         /* dofilereadv() will FRELE the descriptor for us */
  177         return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
  178             &fp->f_offset, retval));
  179 }
  180 
  181 int
  182 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
  183     int iovcnt, off_t *offset, register_t *retval)
  184 {
  185         struct uio auio;
  186         struct iovec *iov;
  187         struct iovec *needfree;
  188         struct iovec aiov[UIO_SMALLIOV];
  189         long i, cnt, error = 0;
  190         u_int iovlen;
  191 #ifdef KTRACE
  192         struct iovec *ktriov = NULL;
  193 #endif
  194 
  195         /* note: can't use iovlen until iovcnt is validated */
  196         iovlen = iovcnt * sizeof(struct iovec);
  197         if ((u_int)iovcnt > UIO_SMALLIOV) {
  198                 if ((u_int)iovcnt > IOV_MAX) {
  199                         error = EINVAL;
  200                         goto out;
  201                 }
  202                 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
  203         } else if ((u_int)iovcnt > 0) {
  204                 iov = aiov;
  205                 needfree = NULL;
  206         } else {
  207                 error = EINVAL;
  208                 goto out;
  209         }
  210 
  211         auio.uio_iov = iov;
  212         auio.uio_iovcnt = iovcnt;
  213         auio.uio_rw = UIO_READ;
  214         auio.uio_segflg = UIO_USERSPACE;
  215         auio.uio_procp = p;
  216         error = copyin(iovp, iov, iovlen);
  217         if (error)
  218                 goto done;
  219         auio.uio_resid = 0;
  220         for (i = 0; i < iovcnt; i++) {
  221                 auio.uio_resid += iov->iov_len;
  222                 /*
  223                  * Reads return ssize_t because -1 is returned on error.
  224                  * Therefore we must restrict the length to SSIZE_MAX to
  225                  * avoid garbage return values.
  226                  */
  227                 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
  228                         error = EINVAL;
  229                         goto done;
  230                 }
  231                 iov++;
  232         }
  233 #ifdef KTRACE
  234         /*
  235          * if tracing, save a copy of iovec
  236          */
  237         if (KTRPOINT(p, KTR_GENIO))  {
  238                 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
  239                 bcopy(auio.uio_iov, ktriov, iovlen);
  240         }
  241 #endif
  242         cnt = auio.uio_resid;
  243         error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
  244         if (error)
  245                 if (auio.uio_resid != cnt && (error == ERESTART ||
  246                     error == EINTR || error == EWOULDBLOCK))
  247                         error = 0;
  248         cnt -= auio.uio_resid;
  249 
  250         fp->f_rxfer++;
  251         fp->f_rbytes += cnt;
  252 #ifdef KTRACE
  253         if (ktriov != NULL) {
  254                 if (error == 0) 
  255                         ktrgenio(p, fd, UIO_READ, ktriov, cnt,
  256                             error);
  257                 free(ktriov, M_TEMP);
  258         }
  259 #endif
  260         *retval = cnt;
  261  done:
  262         if (needfree)
  263                 free(needfree, M_IOV);
  264  out:
  265         FRELE(fp);
  266         return (error);
  267 }
  268 
  269 /*
  270  * Write system call
  271  */
  272 int
  273 sys_write(struct proc *p, void *v, register_t *retval)
  274 {
  275         struct sys_write_args /* {
  276                 syscallarg(int) fd;
  277                 syscallarg(const void *) buf;
  278                 syscallarg(size_t) nbyte;
  279         } */ *uap = v;
  280         int fd = SCARG(uap, fd);
  281         struct file *fp;
  282         struct filedesc *fdp = p->p_fd;
  283 
  284         if ((fp = fd_getfile(fdp, fd)) == NULL)
  285                 return (EBADF);
  286         if ((fp->f_flag & FWRITE) == 0)
  287                 return (EBADF);
  288 
  289         FREF(fp);
  290 
  291         /* dofilewrite() will FRELE the descriptor for us */
  292         return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
  293             &fp->f_offset, retval));
  294 }
  295 
  296 int
  297 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
  298     size_t nbyte, off_t *offset, register_t *retval)
  299 {
  300         struct uio auio;
  301         struct iovec aiov;
  302         long cnt, error = 0;
  303 #ifdef KTRACE
  304         struct iovec ktriov;
  305 #endif
  306 
  307         aiov.iov_base = (void *)buf;            /* XXX kills const */
  308         aiov.iov_len = nbyte;
  309         auio.uio_iov = &aiov;
  310         auio.uio_iovcnt = 1;
  311         auio.uio_resid = nbyte;
  312         auio.uio_rw = UIO_WRITE;
  313         auio.uio_segflg = UIO_USERSPACE;
  314         auio.uio_procp = p;
  315 
  316         /*
  317          * Writes return ssize_t because -1 is returned on error.  Therefore
  318          * we must restrict the length to SSIZE_MAX to avoid garbage return
  319          * values.
  320          */
  321         if (auio.uio_resid > SSIZE_MAX) {
  322                 error = EINVAL;
  323                 goto out;
  324         }
  325 
  326 #ifdef KTRACE
  327         /*
  328          * if tracing, save a copy of iovec
  329          */
  330         if (KTRPOINT(p, KTR_GENIO))
  331                 ktriov = aiov;
  332 #endif
  333         cnt = auio.uio_resid;
  334         error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
  335         if (error) {
  336                 if (auio.uio_resid != cnt && (error == ERESTART ||
  337                     error == EINTR || error == EWOULDBLOCK))
  338                         error = 0;
  339                 if (error == EPIPE)
  340                         psignal(p, SIGPIPE);
  341         }
  342         cnt -= auio.uio_resid;
  343 
  344         fp->f_wxfer++;
  345         fp->f_wbytes += cnt;
  346 #ifdef KTRACE
  347         if (KTRPOINT(p, KTR_GENIO) && error == 0)
  348                 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
  349 #endif
  350         *retval = cnt;
  351  out:
  352         FRELE(fp);
  353         return (error);
  354 }
  355 
  356 /*
  357  * Gather write system call
  358  */
  359 int
  360 sys_writev(struct proc *p, void *v, register_t *retval)
  361 {
  362         struct sys_writev_args /* {
  363                 syscallarg(int) fd;
  364                 syscallarg(const struct iovec *) iovp;
  365                 syscallarg(int) iovcnt;
  366         } */ *uap = v;
  367         int fd = SCARG(uap, fd);
  368         struct file *fp;
  369         struct filedesc *fdp = p->p_fd;
  370 
  371         if ((fp = fd_getfile(fdp, fd)) == NULL)
  372                 return (EBADF);
  373         if ((fp->f_flag & FWRITE) == 0)
  374                 return (EBADF);
  375 
  376         FREF(fp);
  377 
  378         /* dofilewritev() will FRELE the descriptor for us */
  379         return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
  380             &fp->f_offset, retval));
  381 }
  382 
  383 int
  384 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
  385     int iovcnt, off_t *offset, register_t *retval)
  386 {
  387         struct uio auio;
  388         struct iovec *iov;
  389         struct iovec *needfree;
  390         struct iovec aiov[UIO_SMALLIOV];
  391         long i, cnt, error = 0;
  392         u_int iovlen;
  393 #ifdef KTRACE
  394         struct iovec *ktriov = NULL;
  395 #endif
  396 
  397         /* note: can't use iovlen until iovcnt is validated */
  398         iovlen = iovcnt * sizeof(struct iovec);
  399         if ((u_int)iovcnt > UIO_SMALLIOV) {
  400                 if ((u_int)iovcnt > IOV_MAX) {
  401                         error = EINVAL;
  402                         goto out;
  403                 }
  404                 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
  405         } else if ((u_int)iovcnt > 0) {
  406                 iov = aiov;
  407                 needfree = NULL;
  408         } else {
  409                 error = EINVAL;
  410                 goto out;
  411         }
  412 
  413         auio.uio_iov = iov;
  414         auio.uio_iovcnt = iovcnt;
  415         auio.uio_rw = UIO_WRITE;
  416         auio.uio_segflg = UIO_USERSPACE;
  417         auio.uio_procp = p;
  418         error = copyin(iovp, iov, iovlen);
  419         if (error)
  420                 goto done;
  421         auio.uio_resid = 0;
  422         for (i = 0; i < iovcnt; i++) {
  423                 auio.uio_resid += iov->iov_len;
  424                 /*
  425                  * Writes return ssize_t because -1 is returned on error.
  426                  * Therefore we must restrict the length to SSIZE_MAX to
  427                  * avoid garbage return values.
  428                  */
  429                 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
  430                         error = EINVAL;
  431                         goto done;
  432                 }
  433                 iov++;
  434         }
  435 #ifdef KTRACE
  436         /*
  437          * if tracing, save a copy of iovec
  438          */
  439         if (KTRPOINT(p, KTR_GENIO))  {
  440                 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
  441                 bcopy(auio.uio_iov, ktriov, iovlen);
  442         }
  443 #endif
  444         cnt = auio.uio_resid;
  445         error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
  446         if (error) {
  447                 if (auio.uio_resid != cnt && (error == ERESTART ||
  448                     error == EINTR || error == EWOULDBLOCK))
  449                         error = 0;
  450                 if (error == EPIPE)
  451                         psignal(p, SIGPIPE);
  452         }
  453         cnt -= auio.uio_resid;
  454 
  455         fp->f_wxfer++;
  456         fp->f_wbytes += cnt;
  457 #ifdef KTRACE
  458         if (ktriov != NULL) {
  459                 if (error == 0) 
  460                         ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
  461                 free(ktriov, M_TEMP);
  462         }
  463 #endif
  464         *retval = cnt;
  465  done:
  466         if (needfree)
  467                 free(needfree, M_IOV);
  468  out:
  469         FRELE(fp);
  470         return (error);
  471 }
  472 
  473 /*
  474  * Ioctl system call
  475  */
  476 /* ARGSUSED */
  477 int
  478 sys_ioctl(struct proc *p, void *v, register_t *retval)
  479 {
  480         struct sys_ioctl_args /* {
  481                 syscallarg(int) fd;
  482                 syscallarg(u_long) com;
  483                 syscallarg(void *) data;
  484         } */ *uap = v;
  485         struct file *fp;
  486         struct filedesc *fdp;
  487         u_long com;
  488         int error;
  489         u_int size;
  490         caddr_t data, memp;
  491         int tmp;
  492 #define STK_PARAMS      128
  493         char stkbuf[STK_PARAMS];
  494 
  495         fdp = p->p_fd;
  496         if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
  497                 return (EBADF);
  498 
  499         if ((fp->f_flag & (FREAD | FWRITE)) == 0)
  500                 return (EBADF);
  501 
  502         switch (com = SCARG(uap, com)) {
  503         case FIONCLEX:
  504                 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
  505                 return (0);
  506         case FIOCLEX:
  507                 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
  508                 return (0);
  509         }
  510 
  511         /*
  512          * Interpret high order word to find amount of data to be
  513          * copied to/from the user's address space.
  514          */
  515         size = IOCPARM_LEN(com);
  516         if (size > IOCPARM_MAX)
  517                 return (ENOTTY);
  518         FREF(fp);
  519         memp = NULL;
  520         if (size > sizeof (stkbuf)) {
  521                 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
  522                 data = memp;
  523         } else
  524                 data = stkbuf;
  525         if (com&IOC_IN) {
  526                 if (size) {
  527                         error = copyin(SCARG(uap, data), data, (u_int)size);
  528                         if (error) {
  529                                 goto out;
  530                         }
  531                 } else
  532                         *(caddr_t *)data = SCARG(uap, data);
  533         } else if ((com&IOC_OUT) && size)
  534                 /*
  535                  * Zero the buffer so the user always
  536                  * gets back something deterministic.
  537                  */
  538                 bzero(data, size);
  539         else if (com&IOC_VOID)
  540                 *(caddr_t *)data = SCARG(uap, data);
  541 
  542         switch (com) {
  543 
  544         case FIONBIO:
  545                 if ((tmp = *(int *)data) != 0)
  546                         fp->f_flag |= FNONBLOCK;
  547                 else
  548                         fp->f_flag &= ~FNONBLOCK;
  549                 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
  550                 break;
  551 
  552         case FIOASYNC:
  553                 if ((tmp = *(int *)data) != 0)
  554                         fp->f_flag |= FASYNC;
  555                 else
  556                         fp->f_flag &= ~FASYNC;
  557                 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
  558                 break;
  559 
  560         case FIOSETOWN:
  561                 tmp = *(int *)data;
  562                 if (fp->f_type == DTYPE_SOCKET) {
  563                         struct socket *so = (struct socket *)fp->f_data;
  564 
  565                         so->so_pgid = tmp;
  566                         so->so_siguid = p->p_cred->p_ruid;
  567                         so->so_sigeuid = p->p_ucred->cr_uid;
  568                         error = 0;
  569                         break;
  570                 }
  571                 if (tmp <= 0) {
  572                         tmp = -tmp;
  573                 } else {
  574                         struct proc *p1 = pfind(tmp);
  575                         if (p1 == 0) {
  576                                 error = ESRCH;
  577                                 break;
  578                         }
  579                         tmp = p1->p_pgrp->pg_id;
  580                 }
  581                 error = (*fp->f_ops->fo_ioctl)
  582                         (fp, TIOCSPGRP, (caddr_t)&tmp, p);
  583                 break;
  584 
  585         case FIOGETOWN:
  586                 if (fp->f_type == DTYPE_SOCKET) {
  587                         error = 0;
  588                         *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
  589                         break;
  590                 }
  591                 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
  592                 *(int *)data = -*(int *)data;
  593                 break;
  594 
  595         default:
  596                 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
  597                 break;
  598         }
  599         /*
  600          * Copy any data to user, size was
  601          * already set and checked above.
  602          */
  603         if (error == 0 && (com&IOC_OUT) && size)
  604                 error = copyout(data, SCARG(uap, data), (u_int)size);
  605 out:
  606         FRELE(fp);
  607         if (memp)
  608                 free(memp, M_IOCTLOPS);
  609         return (error);
  610 }
  611 
  612 int     selwait, nselcoll;
  613 
  614 /*
  615  * Select system call.
  616  */
  617 int
  618 sys_select(struct proc *p, void *v, register_t *retval)
  619 {
  620         struct sys_select_args /* {
  621                 syscallarg(int) nd;
  622                 syscallarg(fd_set *) in;
  623                 syscallarg(fd_set *) ou;
  624                 syscallarg(fd_set *) ex;
  625                 syscallarg(struct timeval *) tv;
  626         } */ *uap = v;
  627         fd_mask bits[6];
  628         fd_set *pibits[3], *pobits[3];
  629         struct timeval atv, rtv, ttv;
  630         int s, ncoll, error = 0, timo;
  631         u_int nd, ni;
  632 
  633         nd = SCARG(uap, nd);
  634         if (nd > p->p_fd->fd_nfiles) {
  635                 /* forgiving; slightly wrong */
  636                 nd = p->p_fd->fd_nfiles;
  637         }
  638         ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
  639         if (nd > sizeof(bits[0])) {
  640                 caddr_t mbits;
  641 
  642                 mbits = malloc(ni * 6, M_TEMP, M_WAITOK);
  643                 bzero(mbits, ni * 6);
  644                 pibits[0] = (fd_set *)&mbits[ni * 0];
  645                 pibits[1] = (fd_set *)&mbits[ni * 1];
  646                 pibits[2] = (fd_set *)&mbits[ni * 2];
  647                 pobits[0] = (fd_set *)&mbits[ni * 3];
  648                 pobits[1] = (fd_set *)&mbits[ni * 4];
  649                 pobits[2] = (fd_set *)&mbits[ni * 5];
  650         } else {
  651                 bzero(bits, sizeof(bits));
  652                 pibits[0] = (fd_set *)&bits[0];
  653                 pibits[1] = (fd_set *)&bits[1];
  654                 pibits[2] = (fd_set *)&bits[2];
  655                 pobits[0] = (fd_set *)&bits[3];
  656                 pobits[1] = (fd_set *)&bits[4];
  657                 pobits[2] = (fd_set *)&bits[5];
  658         }
  659 
  660 #define getbits(name, x) \
  661         if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \
  662             pibits[x], ni))) \
  663                 goto done;
  664         getbits(in, 0);
  665         getbits(ou, 1);
  666         getbits(ex, 2);
  667 #undef  getbits
  668 
  669         if (SCARG(uap, tv)) {
  670                 error = copyin(SCARG(uap, tv), &atv, sizeof (atv));
  671                 if (error)
  672                         goto done;
  673                 if (itimerfix(&atv)) {
  674                         error = EINVAL;
  675                         goto done;
  676                 }
  677                 getmicrouptime(&rtv);
  678                 timeradd(&atv, &rtv, &atv);
  679         } else {
  680                 atv.tv_sec = 0;
  681                 atv.tv_usec = 0;
  682         }
  683         timo = 0;
  684 
  685 retry:
  686         ncoll = nselcoll;
  687         atomic_setbits_int(&p->p_flag, P_SELECT);
  688         error = selscan(p, pibits[0], pobits[0], nd, ni, retval);
  689         if (error || *retval)
  690                 goto done;
  691         if (SCARG(uap, tv)) {
  692                 getmicrouptime(&rtv);
  693                 if (timercmp(&rtv, &atv, >=))
  694                         goto done;
  695                 ttv = atv;
  696                 timersub(&ttv, &rtv, &ttv);
  697                 timo = ttv.tv_sec > 24 * 60 * 60 ?
  698                         24 * 60 * 60 * hz : tvtohz(&ttv);
  699         }
  700         s = splhigh();
  701         if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
  702                 splx(s);
  703                 goto retry;
  704         }
  705         atomic_clearbits_int(&p->p_flag, P_SELECT);
  706         error = tsleep(&selwait, PSOCK | PCATCH, "select", timo);
  707         splx(s);
  708         if (error == 0)
  709                 goto retry;
  710 done:
  711         atomic_clearbits_int(&p->p_flag, P_SELECT);
  712         /* select is not restarted after signals... */
  713         if (error == ERESTART)
  714                 error = EINTR;
  715         if (error == EWOULDBLOCK)
  716                 error = 0;
  717 #define putbits(name, x) \
  718         if (SCARG(uap, name) && (error2 = copyout(pobits[x], \
  719             SCARG(uap, name), ni))) \
  720                 error = error2;
  721         if (error == 0) {
  722                 int error2;
  723 
  724                 putbits(in, 0);
  725                 putbits(ou, 1);
  726                 putbits(ex, 2);
  727 #undef putbits
  728         }
  729         
  730         if (pibits[0] != (fd_set *)&bits[0])
  731                 free(pibits[0], M_TEMP);
  732         return (error);
  733 }
  734 
  735 int
  736 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni,
  737     register_t *retval)
  738 {
  739         caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits;
  740         struct filedesc *fdp = p->p_fd;
  741         int msk, i, j, fd;
  742         fd_mask bits;
  743         struct file *fp;
  744         int n = 0;
  745         static const int flag[3] = { POLLIN, POLLOUT, POLLPRI };
  746 
  747         for (msk = 0; msk < 3; msk++) {
  748                 fd_set *pibits = (fd_set *)&cibits[msk*ni];
  749                 fd_set *pobits = (fd_set *)&cobits[msk*ni];
  750 
  751                 for (i = 0; i < nfd; i += NFDBITS) {
  752                         bits = pibits->fds_bits[i/NFDBITS];
  753                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
  754                                 bits &= ~(1 << j);
  755                                 if ((fp = fd_getfile(fdp, fd)) == NULL)
  756                                         return (EBADF);
  757                                 FREF(fp);
  758                                 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
  759                                         FD_SET(fd, pobits);
  760                                         n++;
  761                                 }
  762                                 FRELE(fp);
  763                         }
  764                 }
  765         }
  766         *retval = n;
  767         return (0);
  768 }
  769 
  770 /*ARGSUSED*/
  771 int
  772 seltrue(dev_t dev, int events, struct proc *p)
  773 {
  774 
  775         return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
  776 }
  777 
  778 /*
  779  * Record a select request.
  780  */
  781 void
  782 selrecord(struct proc *selector, struct selinfo *sip)
  783 {
  784         struct proc *p;
  785         pid_t mypid;
  786 
  787         mypid = selector->p_pid;
  788         if (sip->si_selpid == mypid)
  789                 return;
  790         if (sip->si_selpid && (p = pfind(sip->si_selpid)) &&
  791             p->p_wchan == (caddr_t)&selwait)
  792                 sip->si_flags |= SI_COLL;
  793         else
  794                 sip->si_selpid = mypid;
  795 }
  796 
  797 /*
  798  * Do a wakeup when a selectable event occurs.
  799  */
  800 void
  801 selwakeup(struct selinfo *sip)
  802 {
  803         struct proc *p;
  804         int s;
  805 
  806         if (sip->si_selpid == 0)
  807                 return;
  808         if (sip->si_flags & SI_COLL) {
  809                 nselcoll++;
  810                 sip->si_flags &= ~SI_COLL;
  811                 wakeup(&selwait);
  812         }
  813         p = pfind(sip->si_selpid);
  814         sip->si_selpid = 0;
  815         if (p != NULL) {
  816                 SCHED_LOCK(s);
  817                 if (p->p_wchan == (caddr_t)&selwait) {
  818                         if (p->p_stat == SSLEEP)
  819                                 setrunnable(p);
  820                         else
  821                                 unsleep(p);
  822                 } else if (p->p_flag & P_SELECT)
  823                         atomic_clearbits_int(&p->p_flag, P_SELECT);
  824                 SCHED_UNLOCK(s);
  825         }
  826 }
  827 
  828 void
  829 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval)
  830 {
  831         struct filedesc *fdp = p->p_fd;
  832         struct file *fp;
  833         u_int i;
  834         int n = 0;
  835 
  836         for (i = 0; i < nfd; i++, pl++) {
  837                 /* Check the file descriptor. */
  838                 if (pl->fd < 0) {
  839                         pl->revents = 0;
  840                         continue;
  841                 }
  842                 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) {
  843                         pl->revents = POLLNVAL;
  844                         n++;
  845                         continue;
  846                 }
  847                 FREF(fp);
  848                 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p);
  849                 FRELE(fp);
  850                 if (pl->revents != 0)
  851                         n++;
  852         }
  853         *retval = n;
  854 }
  855 
  856 /*
  857  * We are using the same mechanism as select only we encode/decode args
  858  * differently.
  859  */
  860 int
  861 sys_poll(struct proc *p, void *v, register_t *retval)
  862 {
  863         struct sys_poll_args /* {
  864                 syscallarg(struct pollfd *) fds;
  865                 syscallarg(u_int) nfds;
  866                 syscallarg(int) timeout;
  867         } */ *uap = v;
  868         size_t sz;
  869         struct pollfd pfds[4], *pl = pfds;
  870         int msec = SCARG(uap, timeout);
  871         struct timeval atv, rtv, ttv;
  872         int timo, ncoll, i, s, error;
  873         extern int nselcoll, selwait;
  874         u_int nfds = SCARG(uap, nfds);
  875 
  876         /* Standards say no more than MAX_OPEN; this is possibly better. */
  877         if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles))
  878                 return (EINVAL);
  879 
  880         sz = sizeof(struct pollfd) * nfds;
  881         
  882         /* optimize for the default case, of a small nfds value */
  883         if (sz > sizeof(pfds))
  884                 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK);
  885 
  886         if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0)
  887                 goto bad;
  888 
  889         for (i = 0; i < nfds; i++)
  890                 pl[i].revents = 0;
  891 
  892         if (msec != INFTIM) {
  893                 atv.tv_sec = msec / 1000;
  894                 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000;
  895 
  896                 if (itimerfix(&atv)) {
  897                         error = EINVAL;
  898                         goto done;
  899                 }
  900                 getmicrouptime(&rtv);
  901                 timeradd(&atv, &rtv, &atv);
  902         } else {
  903                 atv.tv_sec = 0;
  904                 atv.tv_usec = 0;
  905         }
  906         timo = 0;
  907 
  908 retry:
  909         ncoll = nselcoll;
  910         atomic_setbits_int(&p->p_flag, P_SELECT);
  911         pollscan(p, pl, nfds, retval);
  912         if (*retval)
  913                 goto done;
  914         if (msec != INFTIM) {
  915                 getmicrouptime(&rtv);
  916                 if (timercmp(&rtv, &atv, >=))
  917                         goto done;
  918                 ttv = atv;
  919                 timersub(&ttv, &rtv, &ttv);
  920                 timo = ttv.tv_sec > 24 * 60 * 60 ?
  921                         24 * 60 * 60 * hz : tvtohz(&ttv);
  922         }
  923         s = splhigh();
  924         if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
  925                 splx(s);
  926                 goto retry;
  927         }
  928         atomic_clearbits_int(&p->p_flag, P_SELECT);
  929         error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo);
  930         splx(s);
  931         if (error == 0)
  932                 goto retry;
  933 
  934 done:
  935         atomic_clearbits_int(&p->p_flag, P_SELECT);
  936         /*
  937          * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is
  938          *       ignored (since the whole point is to see what would block).
  939          */
  940         switch (error) {
  941         case ERESTART:
  942                 error = copyout(pl, SCARG(uap, fds), sz);
  943                 if (error == 0)
  944                         error = EINTR;
  945                 break;
  946         case EWOULDBLOCK:
  947         case 0:
  948                 error = copyout(pl, SCARG(uap, fds), sz);
  949                 break;
  950         }
  951 bad:
  952         if (pl != pfds)
  953                 free(pl, M_TEMP);
  954         return (error);
  955 }

/* [<][>][^][v][top][bottom][index][help] */