root/netinet/tcp_timer.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tcp_timer_init
  2. tcp_delack
  3. tcp_slowtimo
  4. tcp_canceltimers
  5. tcp_timer_freesack
  6. tcp_timer_rexmt
  7. tcp_timer_persist
  8. tcp_timer_keep
  9. tcp_timer_2msl

    1 /*      $OpenBSD: tcp_timer.c,v 1.39 2007/06/15 18:23:07 markus Exp $   */
    2 /*      $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $  */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93
   33  */
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/mbuf.h>
   38 #include <sys/socket.h>
   39 #include <sys/socketvar.h>
   40 #include <sys/protosw.h>
   41 #include <sys/kernel.h>
   42 
   43 #include <net/route.h>
   44 
   45 #include <netinet/in.h>
   46 #include <netinet/in_systm.h>
   47 #include <netinet/ip.h>
   48 #include <netinet/in_pcb.h>
   49 #include <netinet/ip_var.h>
   50 #include <netinet/tcp.h>
   51 #include <netinet/tcp_fsm.h>
   52 #include <netinet/tcp_timer.h>
   53 #include <netinet/tcp_var.h>
   54 #include <netinet/ip_icmp.h>
   55 #include <netinet/tcp_seq.h>
   56 
   57 int     tcp_keepidle;
   58 int     tcp_keepintvl;
   59 int     tcp_maxpersistidle;     /* max idle time in persist */
   60 int     tcp_maxidle;
   61 
   62 /*
   63  * Time to delay the ACK.  This is initialized in tcp_init(), unless
   64  * its patched.
   65  */
   66 int     tcp_delack_ticks;
   67 
   68 void    tcp_timer_rexmt(void *);
   69 void    tcp_timer_persist(void *);
   70 void    tcp_timer_keep(void *);
   71 void    tcp_timer_2msl(void *);
   72 
   73 const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
   74         tcp_timer_rexmt,
   75         tcp_timer_persist,
   76         tcp_timer_keep,
   77         tcp_timer_2msl,
   78 };
   79 
   80 /*
   81  * Timer state initialization, called from tcp_init().
   82  */
   83 void
   84 tcp_timer_init(void)
   85 {
   86 
   87         if (tcp_keepidle == 0)
   88                 tcp_keepidle = TCPTV_KEEP_IDLE;
   89 
   90         if (tcp_keepintvl == 0)
   91                 tcp_keepintvl = TCPTV_KEEPINTVL;
   92 
   93         if (tcp_maxpersistidle == 0)
   94                 tcp_maxpersistidle = TCPTV_KEEP_IDLE;
   95 
   96         if (tcp_delack_ticks == 0)
   97                 tcp_delack_ticks = TCP_DELACK_TICKS;
   98 }
   99 
  100 /*
  101  * Callout to process delayed ACKs for a TCPCB.
  102  */
  103 void
  104 tcp_delack(void *arg)
  105 {
  106         struct tcpcb *tp = arg;
  107         int s;
  108 
  109         /*
  110          * If tcp_output() wasn't able to transmit the ACK
  111          * for whatever reason, it will restart the delayed
  112          * ACK callout.
  113          */
  114 
  115         s = splsoftnet();
  116         if (tp->t_flags & TF_DEAD) {
  117                 splx(s);
  118                 return;
  119         }
  120         tp->t_flags |= TF_ACKNOW;
  121         (void) tcp_output(tp);
  122         splx(s);
  123 }
  124 
  125 /*
  126  * Tcp protocol timeout routine called every 500 ms.
  127  * Updates the timers in all active tcb's and
  128  * causes finite state machine actions if timers expire.
  129  */
  130 void
  131 tcp_slowtimo()
  132 {
  133         int s;
  134 
  135         s = splsoftnet();
  136         tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
  137 #ifdef TCP_COMPAT_42
  138         tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
  139         if ((int)tcp_iss < 0)
  140                 tcp_iss = 0;                            /* XXX */
  141 #else
  142         tcp_iss += TCP_ISSINCR2/PR_SLOWHZ;              /* increment iss */
  143 #endif /* TCP_COMPAT_42 */
  144         tcp_now++;                                      /* for timestamps */
  145         splx(s);
  146 }
  147 
  148 /*
  149  * Cancel all timers for TCP tp.
  150  */
  151 void
  152 tcp_canceltimers(tp)
  153         struct tcpcb *tp;
  154 {
  155         int i;
  156 
  157         for (i = 0; i < TCPT_NTIMERS; i++)
  158                 TCP_TIMER_DISARM(tp, i);
  159 }
  160 
  161 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
  162     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
  163 
  164 int tcp_totbackoff = 511;       /* sum of tcp_backoff[] */
  165 
  166 /*
  167  * TCP timer processing.
  168  */
  169 
  170 #ifdef TCP_SACK
  171 void    tcp_timer_freesack(struct tcpcb *);
  172 
  173 void
  174 tcp_timer_freesack(struct tcpcb *tp)
  175 {
  176         struct sackhole *p, *q;
  177         /*
  178          * Free SACK holes for 2MSL and REXMT timers.
  179          */
  180         q = tp->snd_holes;
  181         while (q != NULL) {
  182                 p = q;
  183                 q = q->next;
  184                 pool_put(&sackhl_pool, p);
  185         }
  186         tp->snd_holes = 0;
  187 #ifdef TCP_FACK
  188         tp->snd_fack = tp->snd_una;
  189         tp->retran_data = 0;
  190         tp->snd_awnd = 0;
  191 #endif /* TCP_FACK */
  192 }
  193 #endif /* TCP_SACK */
  194 
  195 void
  196 tcp_timer_rexmt(void *arg)
  197 {
  198         struct tcpcb *tp = arg;
  199         uint32_t rto;
  200         int s;
  201 
  202         s = splsoftnet();
  203         if (tp->t_flags & TF_DEAD) {
  204                 splx(s);
  205                 return;
  206         }
  207 
  208         if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
  209             SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
  210             SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) {
  211                 extern struct sockaddr_in icmpsrc;
  212                 struct icmp icmp;
  213 
  214                 tp->t_flags &= ~TF_PMTUD_PEND;
  215 
  216                 /* XXX create fake icmp message with relevant entries */
  217                 icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
  218                 icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
  219                 icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
  220                 icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
  221                 icmp_mtudisc(&icmp);
  222 
  223                 /*
  224                  * Notify all connections to the same peer about
  225                  * new mss and trigger retransmit.
  226                  */
  227                 in_pcbnotifyall(&tcbtable, sintosa(&icmpsrc), EMSGSIZE,
  228                     tcp_mtudisc);
  229                 splx(s);
  230                 return;
  231         }
  232 
  233 #ifdef TCP_SACK
  234         tcp_timer_freesack(tp);
  235 #endif
  236         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
  237                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
  238                 tcpstat.tcps_timeoutdrop++;
  239                 (void)tcp_drop(tp, tp->t_softerror ?
  240                     tp->t_softerror : ETIMEDOUT);
  241                 goto out;
  242         }
  243         tcpstat.tcps_rexmttimeo++;
  244         rto = TCP_REXMTVAL(tp);
  245         if (rto < tp->t_rttmin)
  246                 rto = tp->t_rttmin;
  247         TCPT_RANGESET(tp->t_rxtcur,
  248             rto * tcp_backoff[tp->t_rxtshift],
  249             tp->t_rttmin, TCPTV_REXMTMAX);
  250         TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
  251 
  252         /*
  253          * If we are losing and we are trying path MTU discovery,
  254          * try turning it off.  This will avoid black holes in
  255          * the network which suppress or fail to send "packet
  256          * too big" ICMP messages.  We should ideally do
  257          * lots more sophisticated searching to find the right
  258          * value here...
  259          */
  260         if (ip_mtudisc && tp->t_inpcb &&
  261             TCPS_HAVEESTABLISHED(tp->t_state) &&
  262             tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
  263                 struct inpcb *inp = tp->t_inpcb;
  264                 struct rtentry *rt = NULL;
  265                 struct sockaddr_in sin;
  266 
  267                 /* No data to send means path mtu is not a problem */
  268                 if (!inp->inp_socket->so_snd.sb_cc)
  269                         goto leave;
  270 
  271                 rt = in_pcbrtentry(inp);
  272                 /* Check if path MTU discovery is disabled already */
  273                 if (rt && (rt->rt_flags & RTF_HOST) &&
  274                     (rt->rt_rmx.rmx_locks & RTV_MTU))
  275                         goto leave;
  276 
  277                 rt = NULL;
  278                 switch(tp->pf) {
  279 #ifdef INET6
  280                 case PF_INET6:
  281                         /*
  282                          * We can not turn off path MTU for IPv6.
  283                          * Do nothing for now, maybe lower to
  284                          * minimum MTU.
  285                          */
  286                         break;
  287 #endif
  288                 case PF_INET:
  289                         bzero(&sin, sizeof(struct sockaddr_in));
  290                         sin.sin_family = AF_INET;
  291                         sin.sin_len = sizeof(struct sockaddr_in);
  292                         sin.sin_addr = inp->inp_faddr;
  293                         rt = icmp_mtudisc_clone(sintosa(&sin));
  294                         break;
  295                 }
  296                 if (rt != NULL) {
  297                         /* Disable path MTU discovery */
  298                         if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
  299                                 rt->rt_rmx.rmx_locks |= RTV_MTU;
  300                                 in_rtchange(inp, 0);
  301                         }
  302 
  303                         rtfree(rt);
  304                 }
  305         leave:
  306                 ;
  307         }
  308 
  309         /*
  310          * If losing, let the lower level know and try for
  311          * a better route.  Also, if we backed off this far,
  312          * our srtt estimate is probably bogus.  Clobber it
  313          * so we'll take the next rtt measurement as our srtt;
  314          * move the current srtt into rttvar to keep the current
  315          * retransmit times until then.
  316          */
  317         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
  318                 in_losing(tp->t_inpcb);
  319                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
  320                 tp->t_srtt = 0;
  321         }
  322         tp->snd_nxt = tp->snd_una;
  323 #if defined(TCP_SACK)
  324         /*
  325          * Note:  We overload snd_last to function also as the
  326          * snd_last variable described in RFC 2582
  327          */
  328         tp->snd_last = tp->snd_max;
  329 #endif /* TCP_SACK */
  330         /*
  331          * If timing a segment in this window, stop the timer.
  332          */
  333         tp->t_rtttime = 0;
  334 #ifdef TCP_ECN
  335         /*
  336          * if ECN is enabled, there might be a broken firewall which
  337          * blocks ecn packets.  fall back to non-ecn.
  338          */
  339         if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
  340             && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
  341                 tp->t_flags |= TF_DISABLE_ECN;
  342 #endif
  343         /*
  344          * Close the congestion window down to one segment
  345          * (we'll open it by one segment for each ack we get).
  346          * Since we probably have a window's worth of unacked
  347          * data accumulated, this "slow start" keeps us from
  348          * dumping all that data as back-to-back packets (which
  349          * might overwhelm an intermediate gateway).
  350          *
  351          * There are two phases to the opening: Initially we
  352          * open by one mss on each ack.  This makes the window
  353          * size increase exponentially with time.  If the
  354          * window is larger than the path can handle, this
  355          * exponential growth results in dropped packet(s)
  356          * almost immediately.  To get more time between
  357          * drops but still "push" the network to take advantage
  358          * of improving conditions, we switch from exponential
  359          * to linear window opening at some threshold size.
  360          * For a threshold, we use half the current window
  361          * size, truncated to a multiple of the mss.
  362          *
  363          * (the minimum cwnd that will give us exponential
  364          * growth is 2 mss.  We don't allow the threshold
  365          * to go below this.)
  366          */
  367         {
  368                 u_long win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
  369                 if (win < 2)
  370                         win = 2;
  371                 tp->snd_cwnd = tp->t_maxseg;
  372                 tp->snd_ssthresh = win * tp->t_maxseg;
  373                 tp->t_dupacks = 0;
  374 #ifdef TCP_ECN
  375                 tp->snd_last = tp->snd_max;
  376                 tp->t_flags |= TF_SEND_CWR;
  377 #endif
  378 #if 1 /* TCP_ECN */
  379                 tcpstat.tcps_cwr_timeout++;
  380 #endif
  381         }
  382         (void) tcp_output(tp);
  383 
  384  out:
  385         splx(s);
  386 }
  387 
  388 void
  389 tcp_timer_persist(void *arg)
  390 {
  391         struct tcpcb *tp = arg;
  392         uint32_t rto;
  393         int s;
  394 
  395         s = splsoftnet();
  396         if ((tp->t_flags & TF_DEAD) ||
  397             TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
  398                 splx(s);
  399                 return;
  400         }
  401         tcpstat.tcps_persisttimeo++;
  402         /*
  403          * Hack: if the peer is dead/unreachable, we do not
  404          * time out if the window is closed.  After a full
  405          * backoff, drop the connection if the idle time
  406          * (no responses to probes) reaches the maximum
  407          * backoff that we would use if retransmitting.
  408          */
  409         rto = TCP_REXMTVAL(tp);
  410         if (rto < tp->t_rttmin)
  411                 rto = tp->t_rttmin;
  412         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
  413             ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
  414             (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
  415                 tcpstat.tcps_persistdrop++;
  416                 tp = tcp_drop(tp, ETIMEDOUT);
  417                 goto out;
  418         }
  419         tcp_setpersist(tp);
  420         tp->t_force = 1;
  421         (void) tcp_output(tp);
  422         tp->t_force = 0;
  423  out:
  424         splx(s);
  425 }
  426 
  427 void
  428 tcp_timer_keep(void *arg)
  429 {
  430         struct tcpcb *tp = arg;
  431         int s;
  432 
  433         s = splsoftnet();
  434         if (tp->t_flags & TF_DEAD) {
  435                 splx(s);
  436                 return;
  437         }
  438 
  439         tcpstat.tcps_keeptimeo++;
  440         if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
  441                 goto dropit;
  442         if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
  443             tp->t_state <= TCPS_CLOSING) {
  444                 if ((tcp_maxidle > 0) &&
  445                     ((tcp_now - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle))
  446                         goto dropit;
  447                 /*
  448                  * Send a packet designed to force a response
  449                  * if the peer is up and reachable:
  450                  * either an ACK if the connection is still alive,
  451                  * or an RST if the peer has closed the connection
  452                  * due to timeout or reboot.
  453                  * Using sequence number tp->snd_una-1
  454                  * causes the transmitted zero-length segment
  455                  * to lie outside the receive window;
  456                  * by the protocol spec, this requires the
  457                  * correspondent TCP to respond.
  458                  */
  459                 tcpstat.tcps_keepprobe++;
  460 #ifdef TCP_COMPAT_42
  461                 /*
  462                  * The keepalive packet must have nonzero length
  463                  * to get a 4.2 host to respond.
  464                  */
  465                 tcp_respond(tp, mtod(tp->t_template, caddr_t),
  466                     (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0);
  467 #else
  468                 tcp_respond(tp, mtod(tp->t_template, caddr_t),
  469                     (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0);
  470 #endif
  471                 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
  472         } else
  473                 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
  474 
  475         splx(s);
  476         return;
  477 
  478  dropit:
  479         tcpstat.tcps_keepdrops++;
  480         tp = tcp_drop(tp, ETIMEDOUT);
  481 
  482         splx(s);
  483 }
  484 
  485 void
  486 tcp_timer_2msl(void *arg)
  487 {
  488         struct tcpcb *tp = arg;
  489         int s;
  490 
  491         s = splsoftnet();
  492         if (tp->t_flags & TF_DEAD) {
  493                 splx(s);
  494                 return;
  495         }
  496 
  497 #ifdef TCP_SACK
  498         tcp_timer_freesack(tp);
  499 #endif
  500 
  501         if (tp->t_state != TCPS_TIME_WAIT &&
  502             ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle)))
  503                 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
  504         else
  505                 tp = tcp_close(tp);
  506 
  507         splx(s);
  508 }

/* [<][>][^][v][top][bottom][index][help] */