root/nfs/nfs_socket.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. nfs_connect
  2. nfs_reconnect
  3. nfs_disconnect
  4. nfs_send
  5. nfs_receive
  6. nfs_reply
  7. nfs_request
  8. nfs_rephead
  9. nfs_timer
  10. nfs_sigintr
  11. nfs_sndlock
  12. nfs_sndunlock
  13. nfs_rcvlock
  14. nfs_rcvunlock
  15. nfs_realign_fixup
  16. nfs_realign
  17. nfs_getreq
  18. nfs_msg
  19. nfsrv_rcv
  20. nfsrv_getstream
  21. nfsrv_dorec
  22. nfsrv_wakenfsd

    1 /*      $OpenBSD: nfs_socket.c,v 1.49 2007/06/25 20:40:00 thib Exp $    */
    2 /*      $NetBSD: nfs_socket.c,v 1.27 1996/04/15 20:20:00 thorpej Exp $  */
    3 
    4 /*
    5  * Copyright (c) 1989, 1991, 1993, 1995
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * This code is derived from software contributed to Berkeley by
    9  * Rick Macklem at The University of Guelph.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)nfs_socket.c        8.5 (Berkeley) 3/30/95
   36  */
   37 
   38 /*
   39  * Socket operations for use by nfs
   40  */
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/proc.h>
   45 #include <sys/mount.h>
   46 #include <sys/kernel.h>
   47 #include <sys/mbuf.h>
   48 #include <sys/vnode.h>
   49 #include <sys/domain.h>
   50 #include <sys/protosw.h>
   51 #include <sys/socket.h>
   52 #include <sys/socketvar.h>
   53 #include <sys/syslog.h>
   54 #include <sys/tprintf.h>
   55 #include <sys/namei.h>
   56 
   57 #include <netinet/in.h>
   58 #include <netinet/tcp.h>
   59 
   60 #include <nfs/rpcv2.h>
   61 #include <nfs/nfsproto.h>
   62 #include <nfs/nfs.h>
   63 #include <nfs/xdr_subs.h>
   64 #include <nfs/nfsm_subs.h>
   65 #include <nfs/nfsmount.h>
   66 #include <nfs/nfsnode.h>
   67 #include <nfs/nfsrtt.h>
   68 #include <nfs/nfs_var.h>
   69 
   70 #define TRUE    1
   71 #define FALSE   0
   72 
   73 /*
   74  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
   75  * Use the mean and mean deviation of rtt for the appropriate type of rpc
   76  * for the frequent rpcs and a default for the others.
   77  * The justification for doing "other" this way is that these rpcs
   78  * happen so infrequently that timer est. would probably be stale.
   79  * Also, since many of these rpcs are
   80  * non-idempotent, a conservative timeout is desired.
   81  * getattr, lookup - A+2D
   82  * read, write     - A+4D
   83  * other           - nm_timeo
   84  */
   85 #define NFS_RTO(n, t) \
   86         ((t) == 0 ? (n)->nm_timeo : \
   87          ((t) < 3 ? \
   88           (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
   89           ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
   90 #define NFS_SRTT(r)     (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
   91 #define NFS_SDRTT(r)    (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
   92 /*
   93  * External data, mostly RPC constants in XDR form
   94  */
   95 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
   96         rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
   97         rpc_auth_kerb;
   98 extern u_int32_t nfs_prog;
   99 extern struct nfsstats nfsstats;
  100 extern int nfsv3_procid[NFS_NPROCS];
  101 extern int nfs_ticks;
  102 
  103 /*
  104  * Defines which timer to use for the procnum.
  105  * 0 - default
  106  * 1 - getattr
  107  * 2 - lookup
  108  * 3 - read
  109  * 4 - write
  110  */
  111 static int proct[NFS_NPROCS] = {
  112         0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
  113         0, 0, 0,
  114 };
  115 
  116 /*
  117  * There is a congestion window for outstanding rpcs maintained per mount
  118  * point. The cwnd size is adjusted in roughly the way that:
  119  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
  120  * SIGCOMM '88". ACM, August 1988.
  121  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
  122  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
  123  * of rpcs is in progress.
  124  * (The sent count and cwnd are scaled for integer arith.)
  125  * Variants of "slow start" were tried and were found to be too much of a
  126  * performance hit (ave. rtt 3 times larger),
  127  * I suspect due to the large rtt that nfs rpcs have.
  128  */
  129 #define NFS_CWNDSCALE   256
  130 #define NFS_MAXCWND     (NFS_CWNDSCALE * 32)
  131 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
  132 int nfsrtton = 0;
  133 struct nfsrtt nfsrtt;
  134 
  135 void nfs_realign(struct mbuf **, int);
  136 void nfs_realign_fixup(struct mbuf *, struct mbuf *, unsigned int *);
  137 unsigned int nfs_realign_test = 0;
  138 unsigned int nfs_realign_count = 0;
  139 
  140 struct nfsreqhead nfs_reqq;
  141 
  142 /*
  143  * Initialize sockets and congestion for a new NFS connection.
  144  * We do not free the sockaddr if error.
  145  */
  146 int
  147 nfs_connect(nmp, rep)
  148         struct nfsmount *nmp;
  149         struct nfsreq *rep;
  150 {
  151         struct socket *so;
  152         int s, error, rcvreserve, sndreserve;
  153         struct sockaddr *saddr;
  154         struct sockaddr_in *sin;
  155         struct mbuf *m;
  156 
  157         nmp->nm_so = (struct socket *)0;
  158         saddr = mtod(nmp->nm_nam, struct sockaddr *);
  159         error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, 
  160                 nmp->nm_soproto);
  161         if (error)
  162                 goto bad;
  163         so = nmp->nm_so;
  164         nmp->nm_soflags = so->so_proto->pr_flags;
  165 
  166         /*
  167          * Some servers require that the client port be a reserved port number.
  168          * We always allocate a reserved port, as this prevents filehandle
  169          * disclosure through UDP port capture.
  170          */
  171         if (saddr->sa_family == AF_INET) {
  172                 struct mbuf *mopt;
  173                 int *ip;
  174 
  175                 MGET(mopt, M_WAIT, MT_SOOPTS);
  176                 mopt->m_len = sizeof(int);
  177                 ip = mtod(mopt, int *);
  178                 *ip = IP_PORTRANGE_LOW;
  179                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
  180                 if (error)
  181                         goto bad;
  182 
  183                 MGET(m, M_WAIT, MT_SONAME);
  184                 sin = mtod(m, struct sockaddr_in *);
  185                 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
  186                 sin->sin_family = AF_INET;
  187                 sin->sin_addr.s_addr = INADDR_ANY;
  188                 sin->sin_port = htons(0);
  189                 error = sobind(so, m);
  190                 m_freem(m);
  191                 if (error)
  192                         goto bad;
  193 
  194                 MGET(mopt, M_WAIT, MT_SOOPTS);
  195                 mopt->m_len = sizeof(int);
  196                 ip = mtod(mopt, int *);
  197                 *ip = IP_PORTRANGE_DEFAULT;
  198                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
  199                 if (error)
  200                         goto bad;
  201         }
  202 
  203         /*
  204          * Protocols that do not require connections may be optionally left
  205          * unconnected for servers that reply from a port other than NFS_PORT.
  206          */
  207         if (nmp->nm_flag & NFSMNT_NOCONN) {
  208                 if (nmp->nm_soflags & PR_CONNREQUIRED) {
  209                         error = ENOTCONN;
  210                         goto bad;
  211                 }
  212         } else {
  213                 error = soconnect(so, nmp->nm_nam);
  214                 if (error)
  215                         goto bad;
  216 
  217                 /*
  218                  * Wait for the connection to complete. Cribbed from the
  219                  * connect system call but with the wait timing out so
  220                  * that interruptible mounts don't hang here for a long time.
  221                  */
  222                 s = splsoftnet();
  223                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  224                         (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
  225                                 "nfscon", 2 * hz);
  226                         if ((so->so_state & SS_ISCONNECTING) &&
  227                             so->so_error == 0 && rep &&
  228                             (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
  229                                 so->so_state &= ~SS_ISCONNECTING;
  230                                 splx(s);
  231                                 goto bad;
  232                         }
  233                 }
  234                 if (so->so_error) {
  235                         error = so->so_error;
  236                         so->so_error = 0;
  237                         splx(s);
  238                         goto bad;
  239                 }
  240                 splx(s);
  241         }
  242         /*
  243          * Always set receive timeout to detect server crash and reconnect.
  244          * Otherwise, we can get stuck in soreceive forever.
  245          */
  246         so->so_rcv.sb_timeo = (5 * hz);
  247         if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT))
  248                 so->so_snd.sb_timeo = (5 * hz);
  249         else
  250                 so->so_snd.sb_timeo = 0;
  251         if (nmp->nm_sotype == SOCK_DGRAM) {
  252                 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
  253                 rcvreserve = max(nmp->nm_rsize, nmp->nm_readdirsize) +
  254                     NFS_MAXPKTHDR;
  255         } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
  256                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
  257                 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
  258                     NFS_MAXPKTHDR) * 2;
  259         } else {
  260                 if (nmp->nm_sotype != SOCK_STREAM)
  261                         panic("nfscon sotype");
  262                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  263                         MGET(m, M_WAIT, MT_SOOPTS);
  264                         *mtod(m, int32_t *) = 1;
  265                         m->m_len = sizeof(int32_t);
  266                         sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
  267                 }
  268                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
  269                         MGET(m, M_WAIT, MT_SOOPTS);
  270                         *mtod(m, int32_t *) = 1;
  271                         m->m_len = sizeof(int32_t);
  272                         sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
  273                 }
  274                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
  275                     sizeof (u_int32_t)) * 2;
  276                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
  277                     sizeof (u_int32_t)) * 2;
  278         }
  279         error = soreserve(so, sndreserve, rcvreserve);
  280         if (error)
  281                 goto bad;
  282         so->so_rcv.sb_flags |= SB_NOINTR;
  283         so->so_snd.sb_flags |= SB_NOINTR;
  284 
  285         /* Initialize other non-zero congestion variables */
  286         nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
  287             nmp->nm_srtt[3] = (NFS_TIMEO << 3);
  288         nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
  289             nmp->nm_sdrtt[3] = 0;
  290         nmp->nm_cwnd = NFS_MAXCWND / 2;     /* Initial send window */
  291         nmp->nm_sent = 0;
  292         nmp->nm_timeouts = 0;
  293         return (0);
  294 
  295 bad:
  296         nfs_disconnect(nmp);
  297         return (error);
  298 }
  299 
  300 /*
  301  * Reconnect routine:
  302  * Called when a connection is broken on a reliable protocol.
  303  * - clean up the old socket
  304  * - nfs_connect() again
  305  * - set R_MUSTRESEND for all outstanding requests on mount point
  306  * If this fails the mount point is DEAD!
  307  * nb: Must be called with the nfs_sndlock() set on the mount point.
  308  */
  309 int
  310 nfs_reconnect(rep)
  311         struct nfsreq *rep;
  312 {
  313         struct nfsreq *rp;
  314         struct nfsmount *nmp = rep->r_nmp;
  315         int error;
  316 
  317         nfs_disconnect(nmp);
  318         while ((error = nfs_connect(nmp, rep)) != 0) {
  319                 if (error == EINTR || error == ERESTART)
  320                         return (EINTR);
  321                 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
  322         }
  323 
  324         /*
  325          * Loop through outstanding request list and fix up all requests
  326          * on old socket.
  327          */
  328         TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
  329                 if (rp->r_nmp == nmp) {
  330                         rp->r_flags |= R_MUSTRESEND;
  331                         rp->r_rexmit = 0;
  332                 }
  333         }
  334         return (0);
  335 }
  336 
  337 /*
  338  * NFS disconnect. Clean up and unlink.
  339  */
  340 void
  341 nfs_disconnect(nmp)
  342         struct nfsmount *nmp;
  343 {
  344         struct socket *so;
  345 
  346         if (nmp->nm_so) {
  347                 so = nmp->nm_so;
  348                 nmp->nm_so = (struct socket *)0;
  349                 soshutdown(so, SHUT_RDWR);
  350                 soclose(so);
  351         }
  352 }
  353 
  354 /*
  355  * This is the nfs send routine. For connection based socket types, it
  356  * must be called with an nfs_sndlock() on the socket.
  357  * "rep == NULL" indicates that it has been called from a server.
  358  * For the client side:
  359  * - return EINTR if the RPC is terminated, 0 otherwise
  360  * - set R_MUSTRESEND if the send fails for any reason
  361  * - do any cleanup required by recoverable socket errors (???)
  362  * For the server side:
  363  * - return EINTR or ERESTART if interrupted by a signal
  364  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
  365  * - do any cleanup required by recoverable socket errors (???)
  366  */
  367 int
  368 nfs_send(so, nam, top, rep)
  369         struct socket *so;
  370         struct mbuf *nam;
  371         struct mbuf *top;
  372         struct nfsreq *rep;
  373 {
  374         struct mbuf *sendnam;
  375         int error, soflags, flags;
  376 
  377         if (rep) {
  378                 if (rep->r_flags & R_SOFTTERM) {
  379                         m_freem(top);
  380                         return (EINTR);
  381                 }
  382                 if ((so = rep->r_nmp->nm_so) == NULL) {
  383                         rep->r_flags |= R_MUSTRESEND;
  384                         m_freem(top);
  385                         return (0);
  386                 }
  387                 rep->r_flags &= ~R_MUSTRESEND;
  388                 soflags = rep->r_nmp->nm_soflags;
  389         } else
  390                 soflags = so->so_proto->pr_flags;
  391         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
  392                 sendnam = (struct mbuf *)0;
  393         else
  394                 sendnam = nam;
  395         if (so->so_type == SOCK_SEQPACKET)
  396                 flags = MSG_EOR;
  397         else
  398                 flags = 0;
  399 
  400         error = sosend(so, sendnam, (struct uio *)0, top,
  401                 (struct mbuf *)0, flags);
  402         if (error) {
  403                 if (rep) {
  404                         /*
  405                          * Deal with errors for the client side.
  406                          */
  407                         if (rep->r_flags & R_SOFTTERM)
  408                                 error = EINTR;
  409                         else
  410                                 rep->r_flags |= R_MUSTRESEND;
  411                 }
  412 
  413                 /*
  414                  * Handle any recoverable (soft) socket errors here. (???)
  415                  */
  416                 if (error != EINTR && error != ERESTART &&
  417                         error != EWOULDBLOCK && error != EPIPE)
  418                         error = 0;
  419         }
  420         return (error);
  421 }
  422 
  423 #ifdef NFSCLIENT
  424 /*
  425  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
  426  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
  427  * Mark and consolidate the data into a new mbuf list.
  428  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
  429  *     small mbufs.
  430  * For SOCK_STREAM we must be very careful to read an entire record once
  431  * we have read any of it, even if the system call has been interrupted.
  432  */
  433 int
  434 nfs_receive(rep, aname, mp)
  435         struct nfsreq *rep;
  436         struct mbuf **aname;
  437         struct mbuf **mp;
  438 {
  439         struct socket *so;
  440         struct uio auio;
  441         struct iovec aio;
  442         struct mbuf *m;
  443         struct mbuf *control;
  444         u_int32_t len;
  445         struct mbuf **getnam;
  446         int error, sotype, rcvflg;
  447         struct proc *p = curproc;       /* XXX */
  448 
  449         /*
  450          * Set up arguments for soreceive()
  451          */
  452         *mp = (struct mbuf *)0;
  453         *aname = (struct mbuf *)0;
  454         sotype = rep->r_nmp->nm_sotype;
  455 
  456         /*
  457          * For reliable protocols, lock against other senders/receivers
  458          * in case a reconnect is necessary.
  459          * For SOCK_STREAM, first get the Record Mark to find out how much
  460          * more there is to get.
  461          * We must lock the socket against other receivers
  462          * until we have an entire rpc request/reply.
  463          */
  464         if (sotype != SOCK_DGRAM) {
  465                 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
  466                 if (error)
  467                         return (error);
  468 tryagain:
  469                 /*
  470                  * Check for fatal errors and resending request.
  471                  */
  472                 /*
  473                  * Ugh: If a reconnect attempt just happened, nm_so
  474                  * would have changed. NULL indicates a failed
  475                  * attempt that has essentially shut down this
  476                  * mount point.
  477                  */
  478                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
  479                         nfs_sndunlock(&rep->r_nmp->nm_flag);
  480                         return (EINTR);
  481                 }
  482                 so = rep->r_nmp->nm_so;
  483                 if (!so) {
  484                         error = nfs_reconnect(rep); 
  485                         if (error) {
  486                                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  487                                 return (error);
  488                         }
  489                         goto tryagain;
  490                 }
  491                 while (rep->r_flags & R_MUSTRESEND) {
  492                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
  493                         nfsstats.rpcretries++;
  494                         rep->r_rtt = 0;
  495                         rep->r_flags &= ~R_TIMING;
  496                         error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
  497                         if (error) {
  498                                 if (error == EINTR || error == ERESTART ||
  499                                     (error = nfs_reconnect(rep)) != 0) {
  500                                         nfs_sndunlock(&rep->r_nmp->nm_flag);
  501                                         return (error);
  502                                 }
  503                                 goto tryagain;
  504                         }
  505                 }
  506                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  507                 if (sotype == SOCK_STREAM) {
  508                         aio.iov_base = (caddr_t) &len;
  509                         aio.iov_len = sizeof(u_int32_t);
  510                         auio.uio_iov = &aio;
  511                         auio.uio_iovcnt = 1;
  512                         auio.uio_segflg = UIO_SYSSPACE;
  513                         auio.uio_rw = UIO_READ;
  514                         auio.uio_offset = 0;
  515                         auio.uio_resid = sizeof(u_int32_t);
  516                         auio.uio_procp = p;
  517                         do {
  518                            rcvflg = MSG_WAITALL;
  519                            error = soreceive(so, (struct mbuf **)0, &auio,
  520                                 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
  521                            if (error == EWOULDBLOCK && rep) {
  522                                 if (rep->r_flags & R_SOFTTERM)
  523                                         return (EINTR);
  524                                 /*
  525                                  * looks like the server died after it
  526                                  * received the request, make sure
  527                                  * that we will retransmit and we
  528                                  * don't get stuck here forever.
  529                                  */
  530                                 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
  531                                         nfsstats.rpctimeouts++;
  532                                         error = EPIPE;
  533                                 }
  534                            }
  535                         } while (error == EWOULDBLOCK);
  536                         if (!error && auio.uio_resid > 0) {
  537                             log(LOG_INFO,
  538                                  "short receive (%d/%d) from nfs server %s\n",
  539                                  sizeof(u_int32_t) - auio.uio_resid,
  540                                  sizeof(u_int32_t),
  541                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  542                             error = EPIPE;
  543                         }
  544                         if (error)
  545                                 goto errout;
  546 
  547                         len = ntohl(len) & ~0x80000000;
  548                         /*
  549                          * This is SERIOUS! We are out of sync with the sender
  550                          * and forcing a disconnect/reconnect is all I can do.
  551                          */
  552                         if (len > NFS_MAXPACKET) {
  553                             log(LOG_ERR, "%s (%d) from nfs server %s\n",
  554                                 "impossible packet length",
  555                                 len,
  556                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  557                             error = EFBIG;
  558                             goto errout;
  559                         }
  560                         auio.uio_resid = len;
  561                         do {
  562                             rcvflg = MSG_WAITALL;
  563                             error =  soreceive(so, (struct mbuf **)0,
  564                                 &auio, mp, (struct mbuf **)0, &rcvflg);
  565                         } while (error == EWOULDBLOCK || error == EINTR ||
  566                                  error == ERESTART);
  567                         if (!error && auio.uio_resid > 0) {
  568                             log(LOG_INFO,
  569                                 "short receive (%d/%d) from nfs server %s\n",
  570                                 len - auio.uio_resid, len,
  571                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  572                             error = EPIPE;
  573                         }
  574                 } else {
  575                         /*
  576                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
  577                          * and soreceive() will return when it has either a
  578                          * control msg or a data msg.
  579                          * We have no use for control msg., but must grab them
  580                          * and then throw them away so we know what is going
  581                          * on.
  582                          */
  583                         auio.uio_resid = len = 100000000; /* Anything Big */
  584                         auio.uio_procp = p;
  585                         do {
  586                             rcvflg = 0;
  587                             error =  soreceive(so, (struct mbuf **)0,
  588                                 &auio, mp, &control, &rcvflg);
  589                             if (control)
  590                                 m_freem(control);
  591                             if (error == EWOULDBLOCK && rep) {
  592                                 if (rep->r_flags & R_SOFTTERM)
  593                                         return (EINTR);
  594                             }
  595                         } while (error == EWOULDBLOCK ||
  596                                  (!error && *mp == NULL && control));
  597                         if ((rcvflg & MSG_EOR) == 0)
  598                                 printf("Egad!!\n");
  599                         if (!error && *mp == NULL)
  600                                 error = EPIPE;
  601                         len -= auio.uio_resid;
  602                 }
  603 errout:
  604                 if (error && error != EINTR && error != ERESTART) {
  605                         m_freem(*mp);
  606                         *mp = (struct mbuf *)0;
  607                         if (error != EPIPE)
  608                                 log(LOG_INFO,
  609                                     "receive error %d from nfs server %s\n",
  610                                     error,
  611                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  612                         error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
  613                         if (!error) {
  614                                 error = nfs_reconnect(rep);
  615                                 if (!error)
  616                                         goto tryagain;
  617                                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  618                         }
  619                 }
  620         } else {
  621                 if ((so = rep->r_nmp->nm_so) == NULL)
  622                         return (EACCES);
  623                 if (so->so_state & SS_ISCONNECTED)
  624                         getnam = (struct mbuf **)0;
  625                 else
  626                         getnam = aname;
  627                 auio.uio_resid = len = 1000000;
  628                 auio.uio_procp = p;
  629                 do {
  630                         rcvflg = 0;
  631                         error =  soreceive(so, getnam, &auio, mp,
  632                                 (struct mbuf **)0, &rcvflg);
  633                         if (error == EWOULDBLOCK &&
  634                             (rep->r_flags & R_SOFTTERM))
  635                                 return (EINTR);
  636                 } while (error == EWOULDBLOCK);
  637                 len -= auio.uio_resid;
  638         }
  639         if (error) {
  640                 m_freem(*mp);
  641                 *mp = (struct mbuf *)0;
  642         }
  643         /*
  644          * Search for any mbufs that are not a multiple of 4 bytes long
  645          * or with m_data not longword aligned.
  646          * These could cause pointer alignment problems, so copy them to
  647          * well aligned mbufs.
  648          */
  649         nfs_realign(mp, 5 * NFSX_UNSIGNED);
  650         return (error);
  651 }
  652 
  653 /*
  654  * Implement receipt of reply on a socket.
  655  * We must search through the list of received datagrams matching them
  656  * with outstanding requests using the xid, until ours is found.
  657  */
  658 /* ARGSUSED */
  659 int
  660 nfs_reply(myrep)
  661         struct nfsreq *myrep;
  662 {
  663         struct nfsreq *rep;
  664         struct nfsmount *nmp = myrep->r_nmp;
  665         int32_t t1;
  666         struct mbuf *mrep, *nam, *md;
  667         u_int32_t rxid, *tl;
  668         caddr_t dpos, cp2;
  669         int error;
  670 
  671         /*
  672          * Loop around until we get our own reply
  673          */
  674         for (;;) {
  675                 /*
  676                  * Lock against other receivers so that I don't get stuck in
  677                  * sbwait() after someone else has received my reply for me.
  678                  * Also necessary for connection based protocols to avoid
  679                  * race conditions during a reconnect.
  680                  */
  681                 error = nfs_rcvlock(myrep);
  682                 if (error)
  683                         return (error);
  684                 /* Already received, bye bye */
  685                 if (myrep->r_mrep != NULL) {
  686                         nfs_rcvunlock(&nmp->nm_flag);
  687                         return (0);
  688                 }
  689                 /*
  690                  * Get the next Rpc reply off the socket
  691                  */
  692                 error = nfs_receive(myrep, &nam, &mrep);
  693                 nfs_rcvunlock(&nmp->nm_flag);
  694                 if (error) {
  695 
  696                         /*
  697                          * Ignore routing errors on connectionless protocols??
  698                          */
  699                         if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
  700                                 if (nmp->nm_so)
  701                                         nmp->nm_so->so_error = 0;
  702                                 if (myrep->r_flags & R_GETONEREP)
  703                                         return (0);
  704                                 continue;
  705                         }
  706                         return (error);
  707                 }
  708                 if (nam)
  709                         m_freem(nam);
  710         
  711                 /*
  712                  * Get the xid and check that it is an rpc reply
  713                  */
  714                 md = mrep;
  715                 dpos = mtod(md, caddr_t);
  716                 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
  717                 rxid = *tl++;
  718                 if (*tl != rpc_reply) {
  719                         nfsstats.rpcinvalid++;
  720                         m_freem(mrep);
  721 nfsmout:
  722                         if (myrep->r_flags & R_GETONEREP)
  723                                 return (0);
  724                         continue;
  725                 }
  726 
  727                 /*
  728                  * Loop through the request list to match up the reply
  729                  * Iff no match, just drop the datagram
  730                  */
  731                 for (rep = TAILQ_FIRST(&nfs_reqq); rep != NULL;
  732                     rep = TAILQ_NEXT(rep, r_chain)) {
  733                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
  734                                 /* Found it.. */
  735                                 rep->r_mrep = mrep;
  736                                 rep->r_md = md;
  737                                 rep->r_dpos = dpos;
  738                                 if (nfsrtton) {
  739                                         struct rttl *rt;
  740 
  741                                         rt = &nfsrtt.rttl[nfsrtt.pos];
  742                                         rt->proc = rep->r_procnum;
  743                                         rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
  744                                         rt->sent = nmp->nm_sent;
  745                                         rt->cwnd = nmp->nm_cwnd;
  746                                         rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
  747                                         rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
  748                                         rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
  749                                         getmicrotime(&rt->tstamp);
  750                                         if (rep->r_flags & R_TIMING)
  751                                                 rt->rtt = rep->r_rtt;
  752                                         else
  753                                                 rt->rtt = 1000000;
  754                                         nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
  755                                 }
  756                                 /*
  757                                  * Update congestion window.
  758                                  * Do the additive increase of
  759                                  * one rpc/rtt.
  760                                  */
  761                                 if (nmp->nm_cwnd <= nmp->nm_sent) {
  762                                         nmp->nm_cwnd +=
  763                                            (NFS_CWNDSCALE * NFS_CWNDSCALE +
  764                                            (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
  765                                         if (nmp->nm_cwnd > NFS_MAXCWND)
  766                                                 nmp->nm_cwnd = NFS_MAXCWND;
  767                                 }
  768                                 rep->r_flags &= ~R_SENT;
  769                                 nmp->nm_sent -= NFS_CWNDSCALE;
  770                                 /*
  771                                  * Update rtt using a gain of 0.125 on the mean
  772                                  * and a gain of 0.25 on the deviation.
  773                                  */
  774                                 if (rep->r_flags & R_TIMING) {
  775                                         /*
  776                                          * Since the timer resolution of
  777                                          * NFS_HZ is so course, it can often
  778                                          * result in r_rtt == 0. Since
  779                                          * r_rtt == N means that the actual
  780                                          * rtt is between N+dt and N+2-dt ticks,
  781                                          * add 1.
  782                                          */
  783                                         t1 = rep->r_rtt + 1;
  784                                         t1 -= (NFS_SRTT(rep) >> 3);
  785                                         NFS_SRTT(rep) += t1;
  786                                         if (t1 < 0)
  787                                                 t1 = -t1;
  788                                         t1 -= (NFS_SDRTT(rep) >> 2);
  789                                         NFS_SDRTT(rep) += t1;
  790                                 }
  791                                 nmp->nm_timeouts = 0;
  792                                 break;
  793                         }
  794                 }
  795                 /*
  796                  * If not matched to a request, drop it.
  797                  * If it's mine, get out.
  798                  */
  799                 if (rep == 0) {
  800                         nfsstats.rpcunexpected++;
  801                         m_freem(mrep);
  802                 } else if (rep == myrep) {
  803                         if (rep->r_mrep == NULL)
  804                                 panic("nfsreply nil");
  805                         return (0);
  806                 }
  807                 if (myrep->r_flags & R_GETONEREP)
  808                         return (0);
  809         }
  810 }
  811 
  812 /*
  813  * nfs_request - goes something like this
  814  *      - fill in request struct
  815  *      - links it into list
  816  *      - calls nfs_send() for first transmit
  817  *      - calls nfs_receive() to get reply
  818  *      - break down rpc header and return with nfs reply pointed to
  819  *        by mrep or error
  820  * nb: always frees up mreq mbuf list
  821  */
  822 int
  823 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
  824         struct vnode *vp;
  825         struct mbuf *mrest;
  826         int procnum;
  827         struct proc *procp;
  828         struct ucred *cred;
  829         struct mbuf **mrp;
  830         struct mbuf **mdp;
  831         caddr_t *dposp;
  832 {
  833         struct mbuf *m, *mrep;
  834         struct nfsreq *rep;
  835         u_int32_t *tl;
  836         int i;
  837         struct nfsmount *nmp;
  838         struct mbuf *md, *mheadend;
  839         char nickv[RPCX_NICKVERF];
  840         time_t reqtime, waituntil;
  841         caddr_t dpos, cp2;
  842         int t1, s, error = 0, mrest_len, auth_len, auth_type;
  843         int trylater_delay, failed_auth = 0;
  844         int verf_len, verf_type;
  845         u_int32_t xid;
  846         char *auth_str, *verf_str;
  847         NFSKERBKEY_T key;               /* save session key */
  848 
  849         trylater_delay = NFS_MINTIMEO;
  850 
  851         nmp = VFSTONFS(vp->v_mount);
  852         rep = pool_get(&nfsreqpl, PR_WAITOK);
  853         rep->r_nmp = nmp;
  854         rep->r_vp = vp;
  855         rep->r_procp = procp;
  856         rep->r_procnum = procnum;
  857         i = 0;
  858         m = mrest;
  859         while (m) {
  860                 i += m->m_len;
  861                 m = m->m_next;
  862         }
  863         mrest_len = i;
  864 
  865         /*
  866          * Get the RPC header with authorization.
  867          */
  868 kerbauth:
  869         verf_str = auth_str = (char *)0;
  870         if (nmp->nm_flag & NFSMNT_KERB) {
  871                 verf_str = nickv;
  872                 verf_len = sizeof (nickv);
  873                 auth_type = RPCAUTH_KERB4;
  874                 bzero((caddr_t)key, sizeof (key));
  875                 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
  876                         &auth_len, verf_str, verf_len)) {
  877                         error = nfs_getauth(nmp, rep, cred, &auth_str,
  878                                 &auth_len, verf_str, &verf_len, key);
  879                         if (error) {
  880                                 pool_put(&nfsreqpl, rep);
  881                                 m_freem(mrest);
  882                                 return (error);
  883                         }
  884                 }
  885         } else {
  886                 auth_type = RPCAUTH_UNIX;
  887                 auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ?
  888                         nmp->nm_numgrps : cred->cr_ngroups) << 2) +
  889                         5 * NFSX_UNSIGNED;
  890         }
  891         m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
  892              auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
  893         if (auth_str)
  894                 free(auth_str, M_TEMP);
  895 
  896         /*
  897          * For stream protocols, insert a Sun RPC Record Mark.
  898          */
  899         if (nmp->nm_sotype == SOCK_STREAM) {
  900                 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
  901                 *mtod(m, u_int32_t *) = htonl(0x80000000 |
  902                          (m->m_pkthdr.len - NFSX_UNSIGNED));
  903         }
  904         rep->r_mreq = m;
  905         rep->r_xid = xid;
  906 tryagain:
  907         if (nmp->nm_flag & NFSMNT_SOFT)
  908                 rep->r_retry = nmp->nm_retry;
  909         else
  910                 rep->r_retry = NFS_MAXREXMIT + 1;       /* past clip limit */
  911         rep->r_rtt = rep->r_rexmit = 0;
  912         if (proct[procnum] > 0)
  913                 rep->r_flags = R_TIMING;
  914         else
  915                 rep->r_flags = 0;
  916         rep->r_mrep = NULL;
  917 
  918         /*
  919          * Do the client side RPC.
  920          */
  921         nfsstats.rpcrequests++;
  922         /*
  923          * Chain request into list of outstanding requests. Be sure
  924          * to put it LAST so timer finds oldest requests first.
  925          */
  926         s = splsoftnet();
  927         TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
  928 
  929         /* Get send time for nqnfs */
  930         reqtime = time_second;
  931 
  932         /*
  933          * If backing off another request or avoiding congestion, don't
  934          * send this one now but let timer do it. If not timing a request,
  935          * do it now.
  936          */
  937         if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
  938                 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
  939                 nmp->nm_sent < nmp->nm_cwnd)) {
  940                 splx(s);
  941                 if (nmp->nm_soflags & PR_CONNREQUIRED)
  942                         error = nfs_sndlock(&nmp->nm_flag, rep);
  943                 if (!error) {
  944                         error = nfs_send(nmp->nm_so, nmp->nm_nam,
  945                                         m_copym(m, 0, M_COPYALL, M_WAIT),
  946                                         rep);
  947                         if (nmp->nm_soflags & PR_CONNREQUIRED)
  948                                 nfs_sndunlock(&nmp->nm_flag);
  949                 }
  950                 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
  951                         nmp->nm_sent += NFS_CWNDSCALE;
  952                         rep->r_flags |= R_SENT;
  953                 }
  954         } else {
  955                 splx(s);
  956                 rep->r_rtt = -1;
  957         }
  958 
  959         /*
  960          * Wait for the reply from our send or the timer's.
  961          */
  962         if (!error || error == EPIPE)
  963                 error = nfs_reply(rep);
  964 
  965         /*
  966          * RPC done, unlink the request.
  967          */
  968         s = splsoftnet();
  969         TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
  970         splx(s);
  971 
  972         /*
  973          * Decrement the outstanding request count.
  974          */
  975         if (rep->r_flags & R_SENT) {
  976                 rep->r_flags &= ~R_SENT;        /* paranoia */
  977                 nmp->nm_sent -= NFS_CWNDSCALE;
  978         }
  979 
  980         /*
  981          * If there was a successful reply and a tprintf msg.
  982          * tprintf a response.
  983          */
  984         if (!error && (rep->r_flags & R_TPRINTFMSG))
  985                 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
  986                     "is alive again");
  987         mrep = rep->r_mrep;
  988         md = rep->r_md;
  989         dpos = rep->r_dpos;
  990         if (error) {
  991                 m_freem(rep->r_mreq);
  992                 pool_put(&nfsreqpl, rep);
  993                 return (error);
  994         }
  995 
  996         /*
  997          * break down the rpc header and check if ok
  998          */
  999         nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1000         if (*tl++ == rpc_msgdenied) {
 1001                 if (*tl == rpc_mismatch)
 1002                         error = EOPNOTSUPP;
 1003                 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
 1004                         if (!failed_auth) {
 1005                                 failed_auth++;
 1006                                 mheadend->m_next = (struct mbuf *)0;
 1007                                 m_freem(mrep);
 1008                                 m_freem(rep->r_mreq);
 1009                                 goto kerbauth;
 1010                         } else
 1011                                 error = EAUTH;
 1012                 } else
 1013                         error = EACCES;
 1014                 m_freem(mrep);
 1015                 m_freem(rep->r_mreq);
 1016                 pool_put(&nfsreqpl, rep);
 1017                 return (error);
 1018         }
 1019 
 1020         /*
 1021          * Grab any Kerberos verifier, otherwise just throw it away.
 1022          */
 1023         verf_type = fxdr_unsigned(int, *tl++);
 1024         i = fxdr_unsigned(int32_t, *tl);
 1025         if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
 1026                 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
 1027                 if (error)
 1028                         goto nfsmout;
 1029         } else if (i > 0)
 1030                 nfsm_adv(nfsm_rndup(i));
 1031         nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 1032         /* 0 == ok */
 1033         if (*tl == 0) {
 1034                 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 1035                 if (*tl != 0) {
 1036                         error = fxdr_unsigned(int, *tl);
 1037                         if ((nmp->nm_flag & NFSMNT_NFSV3) &&
 1038                                 error == NFSERR_TRYLATER) {
 1039                                 m_freem(mrep);
 1040                                 error = 0;
 1041                                 waituntil = time_second + trylater_delay;
 1042                                 while (time_second < waituntil)
 1043                                         (void) tsleep((caddr_t)&lbolt,
 1044                                                 PSOCK, "nqnfstry", 0);
 1045                                 trylater_delay *= NFS_TIMEOUTMUL;
 1046                                 if (trylater_delay > NFS_MAXTIMEO)
 1047                                         trylater_delay = NFS_MAXTIMEO;
 1048 
 1049                                 goto tryagain;
 1050                         }
 1051 
 1052                         /*
 1053                          * If the File Handle was stale, invalidate the
 1054                          * lookup cache, just in case.
 1055                          */
 1056                         if (error == ESTALE)
 1057                                 cache_purge(vp);
 1058 
 1059                         if (nmp->nm_flag & NFSMNT_NFSV3 || error == ESTALE) {
 1060                                 *mrp = mrep;
 1061                                 *mdp = md;
 1062                                 *dposp = dpos;
 1063                                 error |= NFSERR_RETERR;
 1064                         } else
 1065                                 m_freem(mrep);
 1066                         m_freem(rep->r_mreq);
 1067                         pool_put(&nfsreqpl, rep);
 1068                         return (error);
 1069                 }
 1070 
 1071                 *mrp = mrep;
 1072                 *mdp = md;
 1073                 *dposp = dpos;
 1074                 m_freem(rep->r_mreq);
 1075                 pool_put(&nfsreqpl, rep);
 1076                 return (0);
 1077         }
 1078         m_freem(mrep);
 1079         error = EPROTONOSUPPORT;
 1080 nfsmout:
 1081         m_freem(rep->r_mreq);
 1082         pool_put(&nfsreqpl, rep);
 1083         return (error);
 1084 }
 1085 #endif /* NFSCLIENT */
 1086 
 1087 /*
 1088  * Generate the rpc reply header
 1089  * siz arg. is used to decide if adding a cluster is worthwhile
 1090  */
 1091 int
 1092 nfs_rephead(siz, nd, slp, err, frev, mrq, mbp, bposp)
 1093         int siz;
 1094         struct nfsrv_descript *nd;
 1095         struct nfssvc_sock *slp;
 1096         int err;
 1097         u_quad_t *frev;
 1098         struct mbuf **mrq;
 1099         struct mbuf **mbp;
 1100         caddr_t *bposp;
 1101 {
 1102         u_int32_t *tl;
 1103         struct mbuf *mreq;
 1104         caddr_t bpos;
 1105         struct mbuf *mb, *mb2;
 1106 
 1107         MGETHDR(mreq, M_WAIT, MT_DATA);
 1108         mb = mreq;
 1109         /*
 1110          * If this is a big reply, use a cluster else
 1111          * try and leave leading space for the lower level headers.
 1112          */
 1113         siz += RPC_REPLYSIZ;
 1114         if (siz >= max_datalen) {
 1115                 MCLGET(mreq, M_WAIT);
 1116         } else
 1117                 mreq->m_data += max_hdr;
 1118         tl = mtod(mreq, u_int32_t *);
 1119         mreq->m_len = 6 * NFSX_UNSIGNED;
 1120         bpos = ((caddr_t)tl) + mreq->m_len;
 1121         *tl++ = txdr_unsigned(nd->nd_retxid);
 1122         *tl++ = rpc_reply;
 1123         if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
 1124                 *tl++ = rpc_msgdenied;
 1125                 if (err & NFSERR_AUTHERR) {
 1126                         *tl++ = rpc_autherr;
 1127                         *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
 1128                         mreq->m_len -= NFSX_UNSIGNED;
 1129                         bpos -= NFSX_UNSIGNED;
 1130                 } else {
 1131                         *tl++ = rpc_mismatch;
 1132                         *tl++ = txdr_unsigned(RPC_VER2);
 1133                         *tl = txdr_unsigned(RPC_VER2);
 1134                 }
 1135         } else {
 1136                 *tl++ = rpc_msgaccepted;
 1137 
 1138                 /*
 1139                  * For Kerberos authentication, we must send the nickname
 1140                  * verifier back, otherwise just RPCAUTH_NULL.
 1141                  */
 1142                 if (nd->nd_flag & ND_KERBFULL) {
 1143                     struct nfsuid *nuidp;
 1144                     struct timeval ktvin, ktvout;
 1145 
 1146                     LIST_FOREACH(nuidp, NUIDHASH(slp, nd->nd_cr.cr_uid),
 1147                      nu_hash) {
 1148                         if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
 1149                             (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
 1150                              &nuidp->nu_haddr, nd->nd_nam2)))
 1151                             break;
 1152                     }
 1153                     if (nuidp) {
 1154                         ktvin.tv_sec =
 1155                             txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
 1156                         ktvin.tv_usec =
 1157                             txdr_unsigned(nuidp->nu_timestamp.tv_usec);
 1158 
 1159                         *tl++ = rpc_auth_kerb;
 1160                         *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
 1161                         *tl = ktvout.tv_sec;
 1162                         nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1163                         *tl++ = ktvout.tv_usec;
 1164                         *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
 1165                     } else {
 1166                         *tl++ = 0;
 1167                         *tl++ = 0;
 1168                     }
 1169                 } else {
 1170                         *tl++ = 0;
 1171                         *tl++ = 0;
 1172                 }
 1173                 switch (err) {
 1174                 case EPROGUNAVAIL:
 1175                         *tl = txdr_unsigned(RPC_PROGUNAVAIL);
 1176                         break;
 1177                 case EPROGMISMATCH:
 1178                         *tl = txdr_unsigned(RPC_PROGMISMATCH);
 1179                         nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1180                         *tl++ = txdr_unsigned(2);
 1181                         *tl = txdr_unsigned(3);
 1182                         break;
 1183                 case EPROCUNAVAIL:
 1184                         *tl = txdr_unsigned(RPC_PROCUNAVAIL);
 1185                         break;
 1186                 case EBADRPC:
 1187                         *tl = txdr_unsigned(RPC_GARBAGE);
 1188                         break;
 1189                 default:
 1190                         *tl = 0;
 1191                         if (err != NFSERR_RETVOID) {
 1192                                 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 1193                                 if (err)
 1194                                     *tl = txdr_unsigned(nfsrv_errmap(nd, err));
 1195                                 else
 1196                                     *tl = 0;
 1197                         }
 1198                         break;
 1199                 };
 1200         }
 1201 
 1202         *mrq = mreq;
 1203         if (mbp != NULL)
 1204                 *mbp = mb;
 1205         *bposp = bpos;
 1206         if (err != 0 && err != NFSERR_RETVOID)
 1207                 nfsstats.srvrpc_errs++;
 1208         return (0);
 1209 }
 1210 
 1211 /*
 1212  * Nfs timer routine
 1213  * Scan the nfsreq list and retranmit any requests that have timed out
 1214  * To avoid retransmission attempts on STREAM sockets (in the future) make
 1215  * sure to set the r_retry field to 0 (implies nm_retry == 0).
 1216  */
 1217 void
 1218 nfs_timer(arg)
 1219         void *arg;
 1220 {
 1221         struct timeout *to = (struct timeout *)arg;
 1222         struct nfsreq *rep;
 1223         struct mbuf *m;
 1224         struct socket *so;
 1225         struct nfsmount *nmp;
 1226         int timeo;
 1227         int s, error;
 1228 #ifdef NFSSERVER
 1229         struct nfssvc_sock *slp;
 1230         struct timeval tv;
 1231         u_quad_t cur_usec;
 1232 #endif
 1233 
 1234         s = splsoftnet();
 1235         for (rep = TAILQ_FIRST(&nfs_reqq); rep != NULL;
 1236             rep = TAILQ_NEXT(rep, r_chain)) {
 1237                 nmp = rep->r_nmp;
 1238                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
 1239                         continue;
 1240                 if (nfs_sigintr(nmp, rep, rep->r_procp)) {
 1241                         rep->r_flags |= R_SOFTTERM;
 1242                         continue;
 1243                 }
 1244                 if (rep->r_rtt >= 0) {
 1245                         rep->r_rtt++;
 1246                         if (nmp->nm_flag & NFSMNT_DUMBTIMR)
 1247                                 timeo = nmp->nm_timeo;
 1248                         else
 1249                                 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
 1250                         if (nmp->nm_timeouts > 0)
 1251                                 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
 1252                         if (rep->r_rtt <= timeo)
 1253                                 continue;
 1254                         if (nmp->nm_timeouts < 8)
 1255                                 nmp->nm_timeouts++;
 1256                 }
 1257                 /*
 1258                  * Check for server not responding
 1259                  */
 1260                 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
 1261                      rep->r_rexmit > nmp->nm_deadthresh) {
 1262                         nfs_msg(rep->r_procp,
 1263                             nmp->nm_mountp->mnt_stat.f_mntfromname,
 1264                             "not responding");
 1265                         rep->r_flags |= R_TPRINTFMSG;
 1266                 }
 1267                 if (rep->r_rexmit >= rep->r_retry) {    /* too many */
 1268                         nfsstats.rpctimeouts++;
 1269                         rep->r_flags |= R_SOFTTERM;
 1270                         continue;
 1271                 }
 1272                 if (nmp->nm_sotype != SOCK_DGRAM) {
 1273                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1274                                 rep->r_rexmit = NFS_MAXREXMIT;
 1275                         continue;
 1276                 }
 1277                 if ((so = nmp->nm_so) == NULL)
 1278                         continue;
 1279 
 1280                 /*
 1281                  * If there is enough space and the window allows..
 1282                  *      Resend it
 1283                  * Set r_rtt to -1 in case we fail to send it now.
 1284                  */
 1285                 rep->r_rtt = -1;
 1286                 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 1287                    ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 1288                     (rep->r_flags & R_SENT) ||
 1289                     nmp->nm_sent < nmp->nm_cwnd) &&
 1290                    (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
 1291                         if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
 1292                             error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
 1293                             (struct mbuf *)0, (struct mbuf *)0);
 1294                         else
 1295                             error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
 1296                             nmp->nm_nam, (struct mbuf *)0);
 1297                         if (error) {
 1298                                 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
 1299                                         so->so_error = 0;
 1300                         } else {
 1301                                 /*
 1302                                  * Iff first send, start timing
 1303                                  * else turn timing off, backoff timer
 1304                                  * and divide congestion window by 2.
 1305                                  */
 1306                                 if (rep->r_flags & R_SENT) {
 1307                                         rep->r_flags &= ~R_TIMING;
 1308                                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1309                                                 rep->r_rexmit = NFS_MAXREXMIT;
 1310                                         nmp->nm_cwnd >>= 1;
 1311                                         if (nmp->nm_cwnd < NFS_CWNDSCALE)
 1312                                                 nmp->nm_cwnd = NFS_CWNDSCALE;
 1313                                         nfsstats.rpcretries++;
 1314                                 } else {
 1315                                         rep->r_flags |= R_SENT;
 1316                                         nmp->nm_sent += NFS_CWNDSCALE;
 1317                                 }
 1318                                 rep->r_rtt = 0;
 1319                         }
 1320                 }
 1321         }
 1322 
 1323 #ifdef NFSSERVER
 1324         /*
 1325          * Scan the write gathering queues for writes that need to be
 1326          * completed now.
 1327          */
 1328         getmicrotime(&tv);
 1329         cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec;
 1330         for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != NULL;
 1331             slp = TAILQ_NEXT(slp, ns_chain)) {
 1332             if (LIST_FIRST(&slp->ns_tq) &&
 1333                 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
 1334                 nfsrv_wakenfsd(slp);
 1335         }
 1336 #endif /* NFSSERVER */
 1337         splx(s);
 1338         timeout_add(to, nfs_ticks);
 1339 }
 1340 
 1341 /*
 1342  * Test for a termination condition pending on the process.
 1343  * This is used for NFSMNT_INT mounts.
 1344  */
 1345 int
 1346 nfs_sigintr(nmp, rep, p)
 1347         struct nfsmount *nmp;
 1348         struct nfsreq *rep;
 1349         struct proc *p;
 1350 {
 1351 
 1352         if (rep && (rep->r_flags & R_SOFTTERM))
 1353                 return (EINTR);
 1354         if (!(nmp->nm_flag & NFSMNT_INT))
 1355                 return (0);
 1356         if (p && p->p_siglist &&
 1357             (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
 1358             NFSINT_SIGMASK))
 1359                 return (EINTR);
 1360         return (0);
 1361 }
 1362 
 1363 /*
 1364  * Lock a socket against others.
 1365  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
 1366  * and also to avoid race conditions between the processes with nfs requests
 1367  * in progress when a reconnect is necessary.
 1368  */
 1369 int
 1370 nfs_sndlock(flagp, rep)
 1371         int *flagp;
 1372         struct nfsreq *rep;
 1373 {
 1374         struct proc *p;
 1375         int slpflag = 0, slptimeo = 0;
 1376 
 1377         if (rep) {
 1378                 p = rep->r_procp;
 1379                 if (rep->r_nmp->nm_flag & NFSMNT_INT)
 1380                         slpflag = PCATCH;
 1381         } else
 1382                 p = (struct proc *)0;
 1383         while (*flagp & NFSMNT_SNDLOCK) {
 1384                 if (nfs_sigintr(rep->r_nmp, rep, p))
 1385                         return (EINTR);
 1386                 *flagp |= NFSMNT_WANTSND;
 1387                 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
 1388                         slptimeo);
 1389                 if (slpflag == PCATCH) {
 1390                         slpflag = 0;
 1391                         slptimeo = 2 * hz;
 1392                 }
 1393         }
 1394         *flagp |= NFSMNT_SNDLOCK;
 1395         return (0);
 1396 }
 1397 
 1398 /*
 1399  * Unlock the stream socket for others.
 1400  */
 1401 void
 1402 nfs_sndunlock(flagp)
 1403         int *flagp;
 1404 {
 1405 
 1406         if ((*flagp & NFSMNT_SNDLOCK) == 0)
 1407                 panic("nfs sndunlock");
 1408         *flagp &= ~NFSMNT_SNDLOCK;
 1409         if (*flagp & NFSMNT_WANTSND) {
 1410                 *flagp &= ~NFSMNT_WANTSND;
 1411                 wakeup((caddr_t)flagp);
 1412         }
 1413 }
 1414 
 1415 int
 1416 nfs_rcvlock(rep)
 1417         struct nfsreq *rep;
 1418 {
 1419         int *flagp = &rep->r_nmp->nm_flag;
 1420         int slpflag, slptimeo = 0;
 1421 
 1422         if (*flagp & NFSMNT_INT)
 1423                 slpflag = PCATCH;
 1424         else
 1425                 slpflag = 0;
 1426         while (*flagp & NFSMNT_RCVLOCK) {
 1427                 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
 1428                         return (EINTR);
 1429                 *flagp |= NFSMNT_WANTRCV;
 1430                 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
 1431                         slptimeo);
 1432                 if (slpflag == PCATCH) {
 1433                         slpflag = 0;
 1434                         slptimeo = 2 * hz;
 1435                 }
 1436         }
 1437         *flagp |= NFSMNT_RCVLOCK;
 1438         return (0);
 1439 }
 1440 
 1441 /*
 1442  * Unlock the stream socket for others.
 1443  */
 1444 void
 1445 nfs_rcvunlock(flagp)
 1446         int *flagp;
 1447 {
 1448 
 1449         if ((*flagp & NFSMNT_RCVLOCK) == 0)
 1450                 panic("nfs rcvunlock");
 1451         *flagp &= ~NFSMNT_RCVLOCK;
 1452         if (*flagp & NFSMNT_WANTRCV) {
 1453                 *flagp &= ~NFSMNT_WANTRCV;
 1454                 wakeup((caddr_t)flagp);
 1455         }
 1456 }
 1457 
 1458 /*
 1459  * Auxiliary routine to align the length of mbuf copies made with m_copyback().
 1460  */
 1461 void
 1462 nfs_realign_fixup(struct mbuf *m, struct mbuf *n, unsigned int *off)
 1463 {
 1464         size_t padding;
 1465 
 1466         /*
 1467          * The maximum number of bytes that m_copyback() places in a mbuf is
 1468          * always an aligned quantity, so realign happens at the chain's tail.
 1469          */
 1470         while (n->m_next != NULL)
 1471                 n = n->m_next;
 1472 
 1473         /*
 1474          * Pad from the next elements in the source chain. Loop until the
 1475          * destination chain is aligned, or the end of the source is reached.
 1476          */
 1477         do {
 1478                 m = m->m_next;
 1479                 if (m == NULL)
 1480                         return;
 1481 
 1482                 padding = min(ALIGN(n->m_len) - n->m_len, m->m_len);
 1483                 if (padding > M_TRAILINGSPACE(n))
 1484                         panic("nfs_realign_fixup: no memory to pad to");
 1485 
 1486                 bcopy(mtod(m, void *), mtod(n, char *) + n->m_len, padding);
 1487 
 1488                 n->m_len += padding;
 1489                 m_adj(m, padding);
 1490                 *off += padding;
 1491 
 1492         } while (!ALIGNED_POINTER(n->m_len, void *));
 1493 }
 1494 
 1495 /*
 1496  * The NFS RPC parsing code uses the data address and the length of mbuf
 1497  * structures to calculate on-memory addresses. This function makes sure these
 1498  * parameters are correctly aligned.
 1499  */
 1500 void
 1501 nfs_realign(struct mbuf **pm, int hsiz)
 1502 {
 1503         struct mbuf *m;
 1504         struct mbuf *n = NULL;
 1505         unsigned int off = 0;
 1506 
 1507         ++nfs_realign_test;
 1508         while ((m = *pm) != NULL) {
 1509                 if (!ALIGNED_POINTER(m->m_data, void *) ||
 1510                     !ALIGNED_POINTER(m->m_len,  void *)) {
 1511                         MGET(n, M_WAIT, MT_DATA);
 1512                         if (ALIGN(m->m_len) >= MINCLSIZE) {
 1513                                 MCLGET(n, M_WAIT);
 1514                         }
 1515                         n->m_len = 0;
 1516                         break;
 1517                 }
 1518                 pm = &m->m_next;
 1519         }
 1520         /*
 1521          * If n is non-NULL, loop on m copying data, then replace the
 1522          * portion of the chain that had to be realigned.
 1523          */
 1524         if (n != NULL) {
 1525                 ++nfs_realign_count;
 1526                 while (m) {
 1527                         m_copyback(n, off, m->m_len, mtod(m, caddr_t));
 1528 
 1529                         /*
 1530                          * If an unaligned amount of memory was copied, fix up
 1531                          * the last mbuf created by m_copyback().
 1532                          */
 1533                         if (!ALIGNED_POINTER(m->m_len, void *))
 1534                                 nfs_realign_fixup(m, n, &off);
 1535 
 1536                         off += m->m_len;
 1537                         m = m->m_next;
 1538                 }
 1539                 m_freem(*pm);
 1540                 *pm = n;
 1541         }
 1542 }
 1543 
 1544 
 1545 /*
 1546  * Parse an RPC request
 1547  * - verify it
 1548  * - fill in the cred struct.
 1549  */
 1550 int
 1551 nfs_getreq(nd, nfsd, has_header)
 1552         struct nfsrv_descript *nd;
 1553         struct nfsd *nfsd;
 1554         int has_header;
 1555 {
 1556         int len, i;
 1557         u_int32_t *tl;
 1558         int32_t t1;
 1559         struct uio uio;
 1560         struct iovec iov;
 1561         caddr_t dpos, cp2, cp;
 1562         u_int32_t nfsvers, auth_type;
 1563         uid_t nickuid;
 1564         int error = 0, ticklen;
 1565         struct mbuf *mrep, *md;
 1566         struct nfsuid *nuidp;
 1567         struct timeval tvin, tvout;
 1568 
 1569         mrep = nd->nd_mrep;
 1570         md = nd->nd_md;
 1571         dpos = nd->nd_dpos;
 1572         if (has_header) {
 1573                 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
 1574                 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
 1575                 if (*tl++ != rpc_call) {
 1576                         m_freem(mrep);
 1577                         return (EBADRPC);
 1578                 }
 1579         } else
 1580                 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 1581         nd->nd_repstat = 0;
 1582         nd->nd_flag = 0;
 1583         if (*tl++ != rpc_vers) {
 1584                 nd->nd_repstat = ERPCMISMATCH;
 1585                 nd->nd_procnum = NFSPROC_NOOP;
 1586                 return (0);
 1587         }
 1588         if (*tl != nfs_prog) {
 1589                 nd->nd_repstat = EPROGUNAVAIL;
 1590                 nd->nd_procnum = NFSPROC_NOOP;
 1591                 return (0);
 1592         }
 1593         tl++;
 1594         nfsvers = fxdr_unsigned(u_int32_t, *tl++);
 1595         if (nfsvers != NFS_VER2 && nfsvers != NFS_VER3) {
 1596                 nd->nd_repstat = EPROGMISMATCH;
 1597                 nd->nd_procnum = NFSPROC_NOOP;
 1598                 return (0);
 1599         }
 1600         if (nfsvers == NFS_VER3)
 1601                 nd->nd_flag = ND_NFSV3;
 1602         nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
 1603         if (nd->nd_procnum == NFSPROC_NULL)
 1604                 return (0);
 1605         if (nd->nd_procnum >= NFS_NPROCS ||
 1606                 (nd->nd_procnum > NFSPROC_COMMIT) ||
 1607                 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
 1608                 nd->nd_repstat = EPROCUNAVAIL;
 1609                 nd->nd_procnum = NFSPROC_NOOP;
 1610                 return (0);
 1611         }
 1612         if ((nd->nd_flag & ND_NFSV3) == 0)
 1613                 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
 1614         auth_type = *tl++;
 1615         len = fxdr_unsigned(int, *tl++);
 1616         if (len < 0 || len > RPCAUTH_MAXSIZ) {
 1617                 m_freem(mrep);
 1618                 return (EBADRPC);
 1619         }
 1620 
 1621         nd->nd_flag &= ~ND_KERBAUTH;
 1622         /*
 1623          * Handle auth_unix or auth_kerb.
 1624          */
 1625         if (auth_type == rpc_auth_unix) {
 1626                 len = fxdr_unsigned(int, *++tl);
 1627                 if (len < 0 || len > NFS_MAXNAMLEN) {
 1628                         m_freem(mrep);
 1629                         return (EBADRPC);
 1630                 }
 1631                 nfsm_adv(nfsm_rndup(len));
 1632                 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1633                 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
 1634                 nd->nd_cr.cr_ref = 1;
 1635                 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
 1636                 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
 1637                 len = fxdr_unsigned(int, *tl);
 1638                 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
 1639                         m_freem(mrep);
 1640                         return (EBADRPC);
 1641                 }
 1642                 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
 1643                 for (i = 0; i < len; i++)
 1644                     if (i < NGROUPS)
 1645                         nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
 1646                     else
 1647                         tl++;
 1648                 nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len;
 1649                 if (nd->nd_cr.cr_ngroups > 1)
 1650                     nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
 1651                 len = fxdr_unsigned(int, *++tl);
 1652                 if (len < 0 || len > RPCAUTH_MAXSIZ) {
 1653                         m_freem(mrep);
 1654                         return (EBADRPC);
 1655                 }
 1656                 if (len > 0)
 1657                         nfsm_adv(nfsm_rndup(len));
 1658         } else if (auth_type == rpc_auth_kerb) {
 1659                 switch (fxdr_unsigned(int, *tl++)) {
 1660                 case RPCAKN_FULLNAME:
 1661                         ticklen = fxdr_unsigned(int, *tl);
 1662                         *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
 1663                         uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
 1664                         nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
 1665                         if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
 1666                                 m_freem(mrep);
 1667                                 return (EBADRPC);
 1668                         }
 1669                         uio.uio_offset = 0;
 1670                         uio.uio_iov = &iov;
 1671                         uio.uio_iovcnt = 1;
 1672                         uio.uio_segflg = UIO_SYSSPACE;
 1673                         iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
 1674                         iov.iov_len = RPCAUTH_MAXSIZ - 4;
 1675                         nfsm_mtouio(&uio, uio.uio_resid);
 1676                         nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1677                         if (*tl++ != rpc_auth_kerb ||
 1678                                 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
 1679                                 printf("Bad kerb verifier\n");
 1680                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 1681                                 nd->nd_procnum = NFSPROC_NOOP;
 1682                                 return (0);
 1683                         }
 1684                         nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
 1685                         tl = (u_int32_t *)cp;
 1686                         if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
 1687                                 printf("Not fullname kerb verifier\n");
 1688                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 1689                                 nd->nd_procnum = NFSPROC_NOOP;
 1690                                 return (0);
 1691                         }
 1692                         cp += NFSX_UNSIGNED;
 1693                         bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
 1694                         nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
 1695                         nd->nd_flag |= ND_KERBFULL;
 1696                         nfsd->nfsd_flag |= NFSD_NEEDAUTH;
 1697                         break;
 1698                 case RPCAKN_NICKNAME:
 1699                         if (len != 2 * NFSX_UNSIGNED) {
 1700                                 printf("Kerb nickname short\n");
 1701                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
 1702                                 nd->nd_procnum = NFSPROC_NOOP;
 1703                                 return (0);
 1704                         }
 1705                         nickuid = fxdr_unsigned(uid_t, *tl);
 1706                         nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1707                         if (*tl++ != rpc_auth_kerb ||
 1708                                 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
 1709                                 printf("Kerb nick verifier bad\n");
 1710                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 1711                                 nd->nd_procnum = NFSPROC_NOOP;
 1712                                 return (0);
 1713                         }
 1714                         nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1715                         tvin.tv_sec = *tl++;
 1716                         tvin.tv_usec = *tl;
 1717 
 1718                         LIST_FOREACH(nuidp, NUIDHASH(nfsd->nfsd_slp, nickuid),
 1719                             nu_hash) {
 1720                                 if (nuidp->nu_cr.cr_uid == nickuid &&
 1721                                     (!nd->nd_nam2 ||
 1722                                      netaddr_match(NU_NETFAM(nuidp),
 1723                                       &nuidp->nu_haddr, nd->nd_nam2)))
 1724                                         break;
 1725                         }
 1726                         if (!nuidp) {
 1727                                 nd->nd_repstat =
 1728                                         (NFSERR_AUTHERR|AUTH_REJECTCRED);
 1729                                 nd->nd_procnum = NFSPROC_NOOP;
 1730                                 return (0);
 1731                         }
 1732 
 1733                         tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
 1734                         tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
 1735                         if (nuidp->nu_expire < time_second ||
 1736                             nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
 1737                             (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
 1738                              nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
 1739                                 nuidp->nu_expire = 0;
 1740                                 nd->nd_repstat =
 1741                                     (NFSERR_AUTHERR|AUTH_REJECTVERF);
 1742                                 nd->nd_procnum = NFSPROC_NOOP;
 1743                                 return (0);
 1744                         }
 1745                         nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
 1746                         nd->nd_flag |= ND_KERBNICK;
 1747                 };
 1748         } else {
 1749                 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
 1750                 nd->nd_procnum = NFSPROC_NOOP;
 1751                 return (0);
 1752         }
 1753 
 1754         nd->nd_md = md;
 1755         nd->nd_dpos = dpos;
 1756         return (0);
 1757 nfsmout:
 1758         return (error);
 1759 }
 1760 
 1761 int
 1762 nfs_msg(p, server, msg)
 1763         struct proc *p;
 1764         char *server, *msg;
 1765 {
 1766         tpr_t tpr;
 1767 
 1768         if (p)
 1769                 tpr = tprintf_open(p);
 1770         else
 1771                 tpr = NULL;
 1772         tprintf(tpr, "nfs server %s: %s\n", server, msg);
 1773         tprintf_close(tpr);
 1774         return (0);
 1775 }
 1776 
 1777 #ifdef NFSSERVER
 1778 int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *,
 1779                                     struct nfssvc_sock *, struct proc *,
 1780                                     struct mbuf **) = {
 1781         nfsrv_null,
 1782         nfsrv_getattr,
 1783         nfsrv_setattr,
 1784         nfsrv_lookup,
 1785         nfsrv3_access,
 1786         nfsrv_readlink,
 1787         nfsrv_read,
 1788         nfsrv_write,
 1789         nfsrv_create,
 1790         nfsrv_mkdir,
 1791         nfsrv_symlink,
 1792         nfsrv_mknod,
 1793         nfsrv_remove,
 1794         nfsrv_rmdir,
 1795         nfsrv_rename,
 1796         nfsrv_link,
 1797         nfsrv_readdir,
 1798         nfsrv_readdirplus,
 1799         nfsrv_statfs,
 1800         nfsrv_fsinfo,
 1801         nfsrv_pathconf,
 1802         nfsrv_commit,
 1803         nfsrv_noop,
 1804         nfsrv_noop,
 1805         nfsrv_noop,
 1806         nfsrv_noop
 1807 };
 1808 
 1809 /*
 1810  * Socket upcall routine for the nfsd sockets.
 1811  * The caddr_t arg is a pointer to the "struct nfssvc_sock".
 1812  * Essentially do as much as possible non-blocking, else punt and it will
 1813  * be called with M_WAIT from an nfsd.
 1814  */
 1815 void
 1816 nfsrv_rcv(so, arg, waitflag)
 1817         struct socket *so;
 1818         caddr_t arg;
 1819         int waitflag;
 1820 {
 1821         struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
 1822         struct mbuf *m;
 1823         struct mbuf *mp, *nam;
 1824         struct uio auio;
 1825         int flags, error;
 1826 
 1827         if ((slp->ns_flag & SLP_VALID) == 0)
 1828                 return;
 1829 #ifdef notdef
 1830         /*
 1831          * Define this to test for nfsds handling this under heavy load.
 1832          */
 1833         if (waitflag == M_DONTWAIT) {
 1834                 slp->ns_flag |= SLP_NEEDQ; goto dorecs;
 1835         }
 1836 #endif
 1837         auio.uio_procp = NULL;
 1838         if (so->so_type == SOCK_STREAM) {
 1839                 /*
 1840                  * If there are already records on the queue, defer soreceive()
 1841                  * to an nfsd so that there is feedback to the TCP layer that
 1842                  * the nfs servers are heavily loaded.
 1843                  */
 1844                 if (slp->ns_rec && waitflag == M_DONTWAIT) {
 1845                         slp->ns_flag |= SLP_NEEDQ;
 1846                         goto dorecs;
 1847                 }
 1848 
 1849                 /*
 1850                  * Do soreceive().
 1851                  */
 1852                 auio.uio_resid = 1000000000;
 1853                 flags = MSG_DONTWAIT;
 1854                 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
 1855                 if (error || mp == (struct mbuf *)0) {
 1856                         if (error == EWOULDBLOCK)
 1857                                 slp->ns_flag |= SLP_NEEDQ;
 1858                         else
 1859                                 slp->ns_flag |= SLP_DISCONN;
 1860                         goto dorecs;
 1861                 }
 1862                 m = mp;
 1863                 if (slp->ns_rawend) {
 1864                         slp->ns_rawend->m_next = m;
 1865                         slp->ns_cc += 1000000000 - auio.uio_resid;
 1866                 } else {
 1867                         slp->ns_raw = m;
 1868                         slp->ns_cc = 1000000000 - auio.uio_resid;
 1869                 }
 1870                 while (m->m_next)
 1871                         m = m->m_next;
 1872                 slp->ns_rawend = m;
 1873 
 1874                 /*
 1875                  * Now try and parse record(s) out of the raw stream data.
 1876                  */
 1877                 error = nfsrv_getstream(slp, waitflag);
 1878                 if (error) {
 1879                         if (error == EPERM)
 1880                                 slp->ns_flag |= SLP_DISCONN;
 1881                         else
 1882                                 slp->ns_flag |= SLP_NEEDQ;
 1883                 }
 1884         } else {
 1885                 do {
 1886                         auio.uio_resid = 1000000000;
 1887                         flags = MSG_DONTWAIT;
 1888                         error = soreceive(so, &nam, &auio, &mp,
 1889                                                 (struct mbuf **)0, &flags);
 1890                         if (mp) {
 1891                                 if (nam) {
 1892                                         m = nam;
 1893                                         m->m_next = mp;
 1894                                 } else
 1895                                         m = mp;
 1896                                 if (slp->ns_recend)
 1897                                         slp->ns_recend->m_nextpkt = m;
 1898                                 else
 1899                                         slp->ns_rec = m;
 1900                                 slp->ns_recend = m;
 1901                                 m->m_nextpkt = (struct mbuf *)0;
 1902                         }
 1903                         if (error) {
 1904                                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
 1905                                         && error != EWOULDBLOCK) {
 1906                                         slp->ns_flag |= SLP_DISCONN;
 1907                                         goto dorecs;
 1908                                 }
 1909                         }
 1910                 } while (mp);
 1911         }
 1912 
 1913         /*
 1914          * Now try and process the request records, non-blocking.
 1915          */
 1916 dorecs:
 1917         if (waitflag == M_DONTWAIT &&
 1918                 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
 1919                 nfsrv_wakenfsd(slp);
 1920 }
 1921 
 1922 /*
 1923  * Try and extract an RPC request from the mbuf data list received on a
 1924  * stream socket. The "waitflag" argument indicates whether or not it
 1925  * can sleep.
 1926  */
 1927 int
 1928 nfsrv_getstream(slp, waitflag)
 1929         struct nfssvc_sock *slp;
 1930         int waitflag;
 1931 {
 1932         struct mbuf *m, **mpp;
 1933         char *cp1, *cp2;
 1934         int len;
 1935         struct mbuf *om, *m2, *recm;
 1936         u_int32_t recmark;
 1937 
 1938         if (slp->ns_flag & SLP_GETSTREAM)
 1939                 panic("nfs getstream");
 1940         slp->ns_flag |= SLP_GETSTREAM;
 1941         for (;;) {
 1942             if (slp->ns_reclen == 0) {
 1943                 if (slp->ns_cc < NFSX_UNSIGNED) {
 1944                         slp->ns_flag &= ~SLP_GETSTREAM;
 1945                         return (0);
 1946                 }
 1947                 m = slp->ns_raw;
 1948                 if (m->m_len >= NFSX_UNSIGNED) {
 1949                         bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
 1950                         m->m_data += NFSX_UNSIGNED;
 1951                         m->m_len -= NFSX_UNSIGNED;
 1952                 } else {
 1953                         cp1 = (caddr_t)&recmark;
 1954                         cp2 = mtod(m, caddr_t);
 1955                         while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
 1956                                 while (m->m_len == 0) {
 1957                                         m = m->m_next;
 1958                                         cp2 = mtod(m, caddr_t);
 1959                                 }
 1960                                 *cp1++ = *cp2++;
 1961                                 m->m_data++;
 1962                                 m->m_len--;
 1963                         }
 1964                 }
 1965                 slp->ns_cc -= NFSX_UNSIGNED;
 1966                 recmark = ntohl(recmark);
 1967                 slp->ns_reclen = recmark & ~0x80000000;
 1968                 if (recmark & 0x80000000)
 1969                         slp->ns_flag |= SLP_LASTFRAG;
 1970                 else
 1971                         slp->ns_flag &= ~SLP_LASTFRAG;
 1972                 if (slp->ns_reclen > NFS_MAXPACKET) {
 1973                         slp->ns_flag &= ~SLP_GETSTREAM;
 1974                         return (EPERM);
 1975                 }
 1976             }
 1977 
 1978             /*
 1979              * Now get the record part.
 1980              */
 1981             recm = NULL;
 1982             if (slp->ns_cc == slp->ns_reclen) {
 1983                 recm = slp->ns_raw;
 1984                 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
 1985                 slp->ns_cc = slp->ns_reclen = 0;
 1986             } else if (slp->ns_cc > slp->ns_reclen) {
 1987                 len = 0;
 1988                 m = slp->ns_raw;
 1989                 om = (struct mbuf *)0;
 1990                 while (len < slp->ns_reclen) {
 1991                         if ((len + m->m_len) > slp->ns_reclen) {
 1992                                 m2 = m_copym(m, 0, slp->ns_reclen - len,
 1993                                         waitflag);
 1994                                 if (m2) {
 1995                                         if (om) {
 1996                                                 om->m_next = m2;
 1997                                                 recm = slp->ns_raw;
 1998                                         } else
 1999                                                 recm = m2;
 2000                                         m->m_data += slp->ns_reclen - len;
 2001                                         m->m_len -= slp->ns_reclen - len;
 2002                                         len = slp->ns_reclen;
 2003                                 } else {
 2004                                         slp->ns_flag &= ~SLP_GETSTREAM;
 2005                                         return (EWOULDBLOCK);
 2006                                 }
 2007                         } else if ((len + m->m_len) == slp->ns_reclen) {
 2008                                 om = m;
 2009                                 len += m->m_len;
 2010                                 m = m->m_next;
 2011                                 recm = slp->ns_raw;
 2012                                 om->m_next = (struct mbuf *)0;
 2013                         } else {
 2014                                 om = m;
 2015                                 len += m->m_len;
 2016                                 m = m->m_next;
 2017                         }
 2018                 }
 2019                 slp->ns_raw = m;
 2020                 slp->ns_cc -= len;
 2021                 slp->ns_reclen = 0;
 2022             } else {
 2023                 slp->ns_flag &= ~SLP_GETSTREAM;
 2024                 return (0);
 2025             }
 2026 
 2027             /*
 2028              * Accumulate the fragments into a record.
 2029              */
 2030             mpp = &slp->ns_frag;
 2031             while (*mpp)
 2032                 mpp = &((*mpp)->m_next);
 2033             *mpp = recm;
 2034             if (slp->ns_flag & SLP_LASTFRAG) {
 2035                 if (slp->ns_recend)
 2036                     slp->ns_recend->m_nextpkt = slp->ns_frag;
 2037                 else
 2038                     slp->ns_rec = slp->ns_frag;
 2039                 slp->ns_recend = slp->ns_frag;
 2040                 slp->ns_frag = (struct mbuf *)0;
 2041             }
 2042         }
 2043 }
 2044 
 2045 /*
 2046  * Parse an RPC header.
 2047  */
 2048 int
 2049 nfsrv_dorec(slp, nfsd, ndp)
 2050         struct nfssvc_sock *slp;
 2051         struct nfsd *nfsd;
 2052         struct nfsrv_descript **ndp;
 2053 {
 2054         struct mbuf *m, *nam;
 2055         struct nfsrv_descript *nd;
 2056         int error;
 2057 
 2058         *ndp = NULL;
 2059         if ((slp->ns_flag & SLP_VALID) == 0 ||
 2060             (m = slp->ns_rec) == (struct mbuf *)0)
 2061                 return (ENOBUFS);
 2062         slp->ns_rec = m->m_nextpkt;
 2063         if (slp->ns_rec)
 2064                 m->m_nextpkt = (struct mbuf *)0;
 2065         else
 2066                 slp->ns_recend = (struct mbuf *)0;
 2067         if (m->m_type == MT_SONAME) {
 2068                 nam = m;
 2069                 m = m->m_next;
 2070                 nam->m_next = NULL;
 2071         } else
 2072                 nam = NULL;
 2073         MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
 2074                 M_NFSRVDESC, M_WAITOK);
 2075         nfs_realign(&m, 10 * NFSX_UNSIGNED);
 2076         nd->nd_md = nd->nd_mrep = m;
 2077         nd->nd_nam2 = nam;
 2078         nd->nd_dpos = mtod(m, caddr_t);
 2079         error = nfs_getreq(nd, nfsd, TRUE);
 2080         if (error) {
 2081                 m_freem(nam);
 2082                 free((caddr_t)nd, M_NFSRVDESC);
 2083                 return (error);
 2084         }
 2085         *ndp = nd;
 2086         nfsd->nfsd_nd = nd;
 2087         return (0);
 2088 }
 2089 
 2090 
 2091 /*
 2092  * Search for a sleeping nfsd and wake it up.
 2093  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
 2094  * running nfsds will go look for the work in the nfssvc_sock list.
 2095  */
 2096 void
 2097 nfsrv_wakenfsd(slp)
 2098         struct nfssvc_sock *slp;
 2099 {
 2100         struct nfsd *nd;
 2101 
 2102         if ((slp->ns_flag & SLP_VALID) == 0)
 2103                 return;
 2104         for (nd = TAILQ_FIRST(&nfsd_head); nd != NULL;
 2105             nd = TAILQ_NEXT(nd, nfsd_chain)) {
 2106                 if (nd->nfsd_flag & NFSD_WAITING) {
 2107                         nd->nfsd_flag &= ~NFSD_WAITING;
 2108                         if (nd->nfsd_slp)
 2109                                 panic("nfsd wakeup");
 2110                         slp->ns_sref++;
 2111                         nd->nfsd_slp = slp;
 2112                         wakeup((caddr_t)nd);
 2113                         return;
 2114                 }
 2115         }
 2116         slp->ns_flag |= SLP_DOREC;
 2117         nfsd_head_flag |= NFSD_CHECKSLP;
 2118 }
 2119 #endif /* NFSSERVER */

/* [<][>][^][v][top][bottom][index][help] */